예제 #1
0
def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory):
    # Create working directory specified for the test
    curdir = tmp_path_factory.mktemp(os.path.basename(__file__))
    os.chdir(curdir)

    # Establish FLC/FLT lists and obtain the requested data
    flc_flag = ""
    flt_flag = ""
    # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which
    # may be part of an ASN, use only IPPPSS with a wildcard.  The unwanted images have to be removed
    # after-the-fact.
    for fn in read_csv_for_filenames:
        if fn.lower().endswith("flc.fits") and flc_flag == "":
            flc_flag = fn[0:6] + "*"
        elif fn.lower().endswith("flt.fits") and flt_flag == "":
            flt_flag = fn[0:6] + "*"

        # If both flags have been set, then break out the loop early.  It may be
        # that all files have to be checked which means the for loop continues
        # until its natural completion.
        if flc_flag and flt_flag:
            break

    # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files
    # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to
    # avoid downloading too many images which are not needed for processing.
    flcfiles = []
    fltfiles = []
    if flc_flag:
        flcfiles = aqutils.retrieve_observation(flc_flag,
                                                suffix=["FLC"],
                                                product_type="pipeline")
    if flt_flag:
        fltfiles = aqutils.retrieve_observation(flt_flag,
                                                suffix=["FLT"],
                                                product_type="pipeline")

    flcfiles.extend(fltfiles)

    # Keep only the files which exist in BOTH lists for processing
    files_to_process = set(read_csv_for_filenames).intersection(set(flcfiles))

    # Identify unwanted files from the download list and remove from disk
    files_to_remove = set(read_csv_for_filenames).symmetric_difference(
        set(flcfiles))
    try:
        for ftr in files_to_remove:
            os.remove(ftr)
    except Exception as x_cept:
        print("")
        print("Exception encountered: {}.".format(x_cept))
        print("The file {} could not be deleted from disk. ".format(ftr))
        print(
            "Remove files which are not used for processing from disk manually."
        )

    print("\ngather_data_for_processing. Gathered data: {}".format(
        files_to_process))

    return files_to_process
예제 #2
0
    def test_astrometric_singleton(self, dataset_names):
        """ Tests pipeline-style processing of a singleton exposure using runastrodriz.
        """
        # Get sample data through astroquery
        flcfile = aqutils.retrieve_observation(dataset_names,
                                               suffix=['FLC'])[0]
        fltfile = aqutils.retrieve_observation(dataset_names,
                                               suffix=['FLT'])[0]
        rawfile = aqutils.retrieve_observation(dataset_names,
                                               suffix=['RAW'])[0]

        # Retrieve reference files for these as well
        self.get_input_file('', fltfile, docopy=False)

        # Insure environment variables are set for full processing
        os.environ['ASTROMETRY_STEP_CONTROL'] = 'on'
        os.environ['ASTROMETRY_COMPUTE_APOSTERIORI'] = 'on'
        os.environ['ASTROMETRY_APPLY_APRIORI'] = 'on'

        # Run pipeline processing using
        runastrodriz.process(rawfile, force=True, inmemory=True)

        # compare WCSNAMEs from flt and flc files
        flc_wcsname = fits.getval(flcfile, 'wcsname', ext=1)
        flt_wcsname = fits.getval(fltfile, 'wcsname', ext=1)

        # Perform comparisons:
        #   - WCSNAME values should contain '-' from either a priori or a posteriori solution
        #   - WCSNAME value should be the same for FLT and FLC images
        assert ('-' in flc_wcsname)
        assert ('-' in flt_wcsname)
        assert (flc_wcsname == flt_wcsname)
예제 #3
0
    def get_data(self, *args, **kwargs):
        """
        Download `filename` into working directory using
        `artifactory_helpers/get_bigdata()` or 
        `astroquery_utils.retrieve_observation`.  Use of `astroquery_utils`
	will allow getting data directly from MAST via astroquery.

        Returns
        --------
        local_files : list
            This will return a list of all the files downloaded with 
            the full path to the local copy of the file.
        """
        if len(args[0]) == 9:  # Only a rootname provided
            local_files = retrieve_observation(args[0])
        else:
            # If user has specified action for no_copy, apply it with
            # default behavior being whatever was defined in the base class.
            docopy = kwargs.get('docopy', self.docopy)
            local_files = get_bigdata(*self.get_input_path(),
                                      *args,
                                      docopy=docopy)
            local_files = [local_files]

        return local_files
예제 #4
0
    def download(self):
        """Called if input_uri starts is `astroquery`
        Download any data files for the `ipppssoot`,  issuing start and
        stop divider messages.

        Returns
        -------
        filepaths : sorted list
            Local file system paths of files which were downloaded for `ipppssoot`,
            some of which will be selected for calibration processing.
        """
        self.divider("Retrieving data files for:", self.download_suffixes)
        files = retrieve_observation(self.ipppssoot,
                                     suffix=self.download_suffixes)
        self.divider("Download data complete.")
        return list(sorted([os.path.abspath(f) for f in files]))
예제 #5
0
def check_disk_get_data(input_list, **pars):
    """Verify that all specified files are present. If not, retrieve them from MAST.

    Parameters
    ----------
    input_list : list
        List of one or more calibrated fits images that will be used for catalog generation.

    Returns
    =======
    total_input_list: list
        list of full filenames

    """
    reload(aqutils)

    empty_list = []
    retrieve_list = [
    ]  # Actual files retrieved via astroquery and resident on disk
    candidate_list = []  # File names gathered from *_asn.fits file
    ipppssoot_list = []  # ipppssoot names used to avoid duplicate downloads
    total_input_list = []  # Output full filename list of data on disk
    member_suffix = '_flc.fits'

    # Get the suffix values
    suffix_to_check = pars.get("suffix")
    # List set up with FLT before FLC to ensure both are retrieved if they both exist
    suffix_to_retrieve = ["ASN", "FLT", "FLC"]
    if suffix_to_check == "RAW":
        suffix_to_retrieve = ["RAW", "FLT", "FLC"]

    # Loop over the input_list to determine if the item in the input_list is a full association file
    # (*_asn.fits), a full individual image file (aka singleton, *_flt.fits), or a root name specification
    # (association or singleton, ipppssoot).
    for input_item in input_list:
        log.info('Input item: {}'.format(input_item))
        indx = input_item.find('_')

        # Input with a suffix (_xxx.fits)
        if indx != -1:
            lc_input_item = input_item.lower()
            suffix = lc_input_item[indx + 1:indx + 4]
            log.info('file: {}'.format(lc_input_item))
            # For an association, need to open the table and read the image names as this could
            # be a custom association.  The assumption is this file is on local disk when specified
            # in this manner (vs just the ipppssoot of the association).
            # This "if" block just collects the wanted full file names.
            if suffix == 'asn':
                try:
                    asntab = Table.read(input_item, format='fits')
                except FileNotFoundError:
                    log.error('File {} not found.'.format(input_item))
                    return (empty_list)
                for row in asntab:
                    if row['MEMTYPE'].startswith('PROD'):
                        continue
                    memname = row['MEMNAME'].lower().strip()
                    # Need to check if the MEMNAME is a full filename or an ipppssoot
                    if memname.find('_') != -1:
                        candidate_list.append(memname)
                    else:
                        # Define suffix for all members based on what files are present
                        if not os.path.exists(memname + member_suffix):
                            member_suffix = '_flt.fits'

                        candidate_list.append(memname + member_suffix)
            elif suffix in ['flc', 'flt']:
                if lc_input_item not in candidate_list:
                    candidate_list.append(lc_input_item)
            else:
                log.error(
                    'Inappropriate file suffix: {}.  Looking for "asn.fits", '
                    '"flc.fits", or "flt.fits".'.format(suffix))
                return (empty_list)

        # Input is an ipppssoot (association or singleton), nine characters by definition.
        # This "else" block actually downloads the data specified as ipppssoot.
        elif len(input_item) == 9:
            try:
                if input_item not in ipppssoot_list:
                    # An ipppssoot of an individual file which is part of an association cannot be
                    # retrieved from MAST
                    log.info("Collect data: {} Suffix: {}".format(
                        input_item, suffix_to_retrieve))
                    for filetype in suffix_to_retrieve:
                        retrieve_list += aqutils.retrieve_observation(
                            input_item, suffix=filetype)
                    log.info("Collected data: {}".format(retrieve_list))

                    # If the retrieved list is not empty, add filename(s) to the total_input_list.
                    # Also, update the ipppssoot_list so we do not try to download the data again.  Need
                    # to do this since retrieve_list can be empty because (1) data cannot be acquired (error)
                    # or (2) data is already on disk (ok).
                    if retrieve_list:
                        total_input_list += retrieve_list
                        ipppssoot_list.append(input_item)
                    else:
                        log.error(
                            'File {} cannot be retrieved from MAST.'.format(
                                input_item))
                        return (empty_list)
            except Exception:
                log.info("Exception in check_disk_get_data")
                exc_type, exc_value, exc_tb = sys.exc_info()
                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_tb,
                                          file=sys.stdout)

    # Only the retrieve_list files via astroquery have been put into the total_input_list thus far.
    # Now check candidate_list to detect or acquire the requested files from MAST via astroquery.
    for file in candidate_list:
        # If the file is found on disk, add it to the total_input_list and continue
        if glob.glob(file):
            total_input_list.append(file)
            continue
        else:
            log.error(
                'File {} cannot be found on the local disk.'.format(file))
            return (empty_list)

    log.info("TOTAL INPUT LIST: {}".format(total_input_list))
    return (total_input_list)
예제 #6
0
def test_run_svmpoller(tmpdir, dataset):
    """ Tests to read a series of poller files and process the contents of each as Single Visit Mosaic

        Characteristics of these tests:

        Success Criteria:
            The SVM processing returns a value of 0: Success or 1: Failure

        The input svm_list file is a list of poller filenames, one filename per line.
        Each poller file must be obtained from a specified directory and read to obtain the
        names of the data files which need to be processed.

        This test file can be executed in the following manner:
            $ pytest -n # -s --basetemp=/internal/hladata/yourUniqueDirectoryHere --bigdata --slow
              --svm_list svm_input.lst test_run_svmpoller.py >& test_svmpoller_output.txt &
            $ tail -f test_svmpoller_output.txt
          * The `-n #` option can be used to run tests in parallel if `pytest-xdist` has
            been installed where `#` is the number of cpus to use. THIS IS NOT ADVISED FOR USE.
          * Note: When running this test, the `--basetemp` directory should be set to a unique
            existing directory to avoid deleting previous test output.
          * A default master list, svm_input.lst, exists in the tests/hla directory and contains 3 datasets.
            This specific list may NOT the list you want to use, but it allows you to see what this file
            should contain.  Please note the PyTests should be kept to runtimes which are not
            excessive.

    """
    print("TEST_RUN_SVMPOLLER. Dataset: ", dataset)

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    subdir = ""
    prevdir = os.getcwd()

    # create working directory specified for the test
    if not tmpdir.ensure(subdir, dir=True):
        curdir = tmpdir.mkdir(subdir).strpath
    else:
        curdir = tmpdir.join(subdir).strpath
    os.chdir(curdir)

    return_value = 1

    try:

        # Read the CSV poller file residing in the tests directory to extract the individual visit FLT/FLC filenames
        path = os.path.join(os.path.dirname(__file__), dataset)
        table = ascii.read(path, format="no_header")
        filename_column = table.colnames[0]
        filenames = list(table[filename_column])
        print("\nread_csv_for_filenames. Filesnames from poller: {}".format(
            filenames))

        # Establish FLC/FLT lists and obtain the requested data
        flc_flag = ""
        flt_flag = ""
        # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which
        # may be part of an ASN, use only IPPPSS with a wildcard.  The unwanted images have to be removed
        # after-the-fact.
        for fn in filenames:
            if fn.lower().endswith("flc.fits") and flc_flag == "":
                flc_flag = fn[0:6] + "*"
            elif fn.lower().endswith("flt.fits") and flt_flag == "":
                flt_flag = fn[0:6] + "*"

            # If both flags have been set, then break out the loop early.  It may be
            # that all files have to be checked which means the for loop continues
            # until its natural completion.
            if flc_flag and flt_flag:
                break

        # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files
        # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to
        # avoid downloading too many images which are not needed for processing.
        flcfiles = []
        fltfiles = []
        if flc_flag:
            flcfiles = aqutils.retrieve_observation(flc_flag,
                                                    suffix=["FLC"],
                                                    product_type="pipeline")
        if flt_flag:
            fltfiles = aqutils.retrieve_observation(flt_flag,
                                                    suffix=["FLT"],
                                                    product_type="pipeline")

        flcfiles.extend(fltfiles)

        # Keep only the files which exist in BOTH lists for processing
        files_to_process = set(filenames).intersection(set(flcfiles))

        # Identify unwanted files from the download list and remove from disk
        files_to_remove = set(filenames).symmetric_difference(set(flcfiles))

        try:
            for ftr in files_to_remove:
                os.remove(ftr)
        except Exception as x_cept:
            print("")
            print("Exception encountered: {}.".format(x_cept))
            print("The file {} could not be deleted from disk. ".format(ftr))
            print(
                "Remove files which are not used for processing from disk manually."
            )

        # Run the SVM processing
        path = os.path.join(os.path.dirname(__file__), dataset)

        return_value = runsinglehap.perform(path)

    # Catch anything that happens and report it.  This is meant to catch unexpected errors and
    # generate sufficient output exception information so algorithmic problems can be addressed.
    except Exception as except_details:
        traceback.print_exc()
        pytest.fail("TEST_RUN_SVMPOLLER. Exception Dataset: {}\n", dataset)
        return_value = 1

    assert return_value == 0

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    # Return to original directory
    os.chdir(prevdir)