def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # Create working directory specified for the test curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) # Establish FLC/FLT lists and obtain the requested data flc_flag = "" flt_flag = "" # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed # after-the-fact. for fn in read_csv_for_filenames: if fn.lower().endswith("flc.fits") and flc_flag == "": flc_flag = fn[0:6] + "*" elif fn.lower().endswith("flt.fits") and flt_flag == "": flt_flag = fn[0:6] + "*" # If both flags have been set, then break out the loop early. It may be # that all files have to be checked which means the for loop continues # until its natural completion. if flc_flag and flt_flag: break # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") flcfiles.extend(fltfiles) # Keep only the files which exist in BOTH lists for processing files_to_process = set(read_csv_for_filenames).intersection(set(flcfiles)) # Identify unwanted files from the download list and remove from disk files_to_remove = set(read_csv_for_filenames).symmetric_difference( set(flcfiles)) try: for ftr in files_to_remove: os.remove(ftr) except Exception as x_cept: print("") print("Exception encountered: {}.".format(x_cept)) print("The file {} could not be deleted from disk. ".format(ftr)) print( "Remove files which are not used for processing from disk manually." ) print("\ngather_data_for_processing. Gathered data: {}".format( files_to_process)) return files_to_process
def test_astrometric_singleton(self, dataset_names): """ Tests pipeline-style processing of a singleton exposure using runastrodriz. """ # Get sample data through astroquery flcfile = aqutils.retrieve_observation(dataset_names, suffix=['FLC'])[0] fltfile = aqutils.retrieve_observation(dataset_names, suffix=['FLT'])[0] rawfile = aqutils.retrieve_observation(dataset_names, suffix=['RAW'])[0] # Retrieve reference files for these as well self.get_input_file('', fltfile, docopy=False) # Insure environment variables are set for full processing os.environ['ASTROMETRY_STEP_CONTROL'] = 'on' os.environ['ASTROMETRY_COMPUTE_APOSTERIORI'] = 'on' os.environ['ASTROMETRY_APPLY_APRIORI'] = 'on' # Run pipeline processing using runastrodriz.process(rawfile, force=True, inmemory=True) # compare WCSNAMEs from flt and flc files flc_wcsname = fits.getval(flcfile, 'wcsname', ext=1) flt_wcsname = fits.getval(fltfile, 'wcsname', ext=1) # Perform comparisons: # - WCSNAME values should contain '-' from either a priori or a posteriori solution # - WCSNAME value should be the same for FLT and FLC images assert ('-' in flc_wcsname) assert ('-' in flt_wcsname) assert (flc_wcsname == flt_wcsname)
def get_data(self, *args, **kwargs): """ Download `filename` into working directory using `artifactory_helpers/get_bigdata()` or `astroquery_utils.retrieve_observation`. Use of `astroquery_utils` will allow getting data directly from MAST via astroquery. Returns -------- local_files : list This will return a list of all the files downloaded with the full path to the local copy of the file. """ if len(args[0]) == 9: # Only a rootname provided local_files = retrieve_observation(args[0]) else: # If user has specified action for no_copy, apply it with # default behavior being whatever was defined in the base class. docopy = kwargs.get('docopy', self.docopy) local_files = get_bigdata(*self.get_input_path(), *args, docopy=docopy) local_files = [local_files] return local_files
def download(self): """Called if input_uri starts is `astroquery` Download any data files for the `ipppssoot`, issuing start and stop divider messages. Returns ------- filepaths : sorted list Local file system paths of files which were downloaded for `ipppssoot`, some of which will be selected for calibration processing. """ self.divider("Retrieving data files for:", self.download_suffixes) files = retrieve_observation(self.ipppssoot, suffix=self.download_suffixes) self.divider("Download data complete.") return list(sorted([os.path.abspath(f) for f in files]))
def check_disk_get_data(input_list, **pars): """Verify that all specified files are present. If not, retrieve them from MAST. Parameters ---------- input_list : list List of one or more calibrated fits images that will be used for catalog generation. Returns ======= total_input_list: list list of full filenames """ reload(aqutils) empty_list = [] retrieve_list = [ ] # Actual files retrieved via astroquery and resident on disk candidate_list = [] # File names gathered from *_asn.fits file ipppssoot_list = [] # ipppssoot names used to avoid duplicate downloads total_input_list = [] # Output full filename list of data on disk member_suffix = '_flc.fits' # Get the suffix values suffix_to_check = pars.get("suffix") # List set up with FLT before FLC to ensure both are retrieved if they both exist suffix_to_retrieve = ["ASN", "FLT", "FLC"] if suffix_to_check == "RAW": suffix_to_retrieve = ["RAW", "FLT", "FLC"] # Loop over the input_list to determine if the item in the input_list is a full association file # (*_asn.fits), a full individual image file (aka singleton, *_flt.fits), or a root name specification # (association or singleton, ipppssoot). for input_item in input_list: log.info('Input item: {}'.format(input_item)) indx = input_item.find('_') # Input with a suffix (_xxx.fits) if indx != -1: lc_input_item = input_item.lower() suffix = lc_input_item[indx + 1:indx + 4] log.info('file: {}'.format(lc_input_item)) # For an association, need to open the table and read the image names as this could # be a custom association. The assumption is this file is on local disk when specified # in this manner (vs just the ipppssoot of the association). # This "if" block just collects the wanted full file names. if suffix == 'asn': try: asntab = Table.read(input_item, format='fits') except FileNotFoundError: log.error('File {} not found.'.format(input_item)) return (empty_list) for row in asntab: if row['MEMTYPE'].startswith('PROD'): continue memname = row['MEMNAME'].lower().strip() # Need to check if the MEMNAME is a full filename or an ipppssoot if memname.find('_') != -1: candidate_list.append(memname) else: # Define suffix for all members based on what files are present if not os.path.exists(memname + member_suffix): member_suffix = '_flt.fits' candidate_list.append(memname + member_suffix) elif suffix in ['flc', 'flt']: if lc_input_item not in candidate_list: candidate_list.append(lc_input_item) else: log.error( 'Inappropriate file suffix: {}. Looking for "asn.fits", ' '"flc.fits", or "flt.fits".'.format(suffix)) return (empty_list) # Input is an ipppssoot (association or singleton), nine characters by definition. # This "else" block actually downloads the data specified as ipppssoot. elif len(input_item) == 9: try: if input_item not in ipppssoot_list: # An ipppssoot of an individual file which is part of an association cannot be # retrieved from MAST log.info("Collect data: {} Suffix: {}".format( input_item, suffix_to_retrieve)) for filetype in suffix_to_retrieve: retrieve_list += aqutils.retrieve_observation( input_item, suffix=filetype) log.info("Collected data: {}".format(retrieve_list)) # If the retrieved list is not empty, add filename(s) to the total_input_list. # Also, update the ipppssoot_list so we do not try to download the data again. Need # to do this since retrieve_list can be empty because (1) data cannot be acquired (error) # or (2) data is already on disk (ok). if retrieve_list: total_input_list += retrieve_list ipppssoot_list.append(input_item) else: log.error( 'File {} cannot be retrieved from MAST.'.format( input_item)) return (empty_list) except Exception: log.info("Exception in check_disk_get_data") exc_type, exc_value, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_tb, file=sys.stdout) # Only the retrieve_list files via astroquery have been put into the total_input_list thus far. # Now check candidate_list to detect or acquire the requested files from MAST via astroquery. for file in candidate_list: # If the file is found on disk, add it to the total_input_list and continue if glob.glob(file): total_input_list.append(file) continue else: log.error( 'File {} cannot be found on the local disk.'.format(file)) return (empty_list) log.info("TOTAL INPUT LIST: {}".format(total_input_list)) return (total_input_list)
def test_run_svmpoller(tmpdir, dataset): """ Tests to read a series of poller files and process the contents of each as Single Visit Mosaic Characteristics of these tests: Success Criteria: The SVM processing returns a value of 0: Success or 1: Failure The input svm_list file is a list of poller filenames, one filename per line. Each poller file must be obtained from a specified directory and read to obtain the names of the data files which need to be processed. This test file can be executed in the following manner: $ pytest -n # -s --basetemp=/internal/hladata/yourUniqueDirectoryHere --bigdata --slow --svm_list svm_input.lst test_run_svmpoller.py >& test_svmpoller_output.txt & $ tail -f test_svmpoller_output.txt * The `-n #` option can be used to run tests in parallel if `pytest-xdist` has been installed where `#` is the number of cpus to use. THIS IS NOT ADVISED FOR USE. * Note: When running this test, the `--basetemp` directory should be set to a unique existing directory to avoid deleting previous test output. * A default master list, svm_input.lst, exists in the tests/hla directory and contains 3 datasets. This specific list may NOT the list you want to use, but it allows you to see what this file should contain. Please note the PyTests should be kept to runtimes which are not excessive. """ print("TEST_RUN_SVMPOLLER. Dataset: ", dataset) current_dt = datetime.datetime.now() print(str(current_dt)) subdir = "" prevdir = os.getcwd() # create working directory specified for the test if not tmpdir.ensure(subdir, dir=True): curdir = tmpdir.mkdir(subdir).strpath else: curdir = tmpdir.join(subdir).strpath os.chdir(curdir) return_value = 1 try: # Read the CSV poller file residing in the tests directory to extract the individual visit FLT/FLC filenames path = os.path.join(os.path.dirname(__file__), dataset) table = ascii.read(path, format="no_header") filename_column = table.colnames[0] filenames = list(table[filename_column]) print("\nread_csv_for_filenames. Filesnames from poller: {}".format( filenames)) # Establish FLC/FLT lists and obtain the requested data flc_flag = "" flt_flag = "" # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed # after-the-fact. for fn in filenames: if fn.lower().endswith("flc.fits") and flc_flag == "": flc_flag = fn[0:6] + "*" elif fn.lower().endswith("flt.fits") and flt_flag == "": flt_flag = fn[0:6] + "*" # If both flags have been set, then break out the loop early. It may be # that all files have to be checked which means the for loop continues # until its natural completion. if flc_flag and flt_flag: break # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") flcfiles.extend(fltfiles) # Keep only the files which exist in BOTH lists for processing files_to_process = set(filenames).intersection(set(flcfiles)) # Identify unwanted files from the download list and remove from disk files_to_remove = set(filenames).symmetric_difference(set(flcfiles)) try: for ftr in files_to_remove: os.remove(ftr) except Exception as x_cept: print("") print("Exception encountered: {}.".format(x_cept)) print("The file {} could not be deleted from disk. ".format(ftr)) print( "Remove files which are not used for processing from disk manually." ) # Run the SVM processing path = os.path.join(os.path.dirname(__file__), dataset) return_value = runsinglehap.perform(path) # Catch anything that happens and report it. This is meant to catch unexpected errors and # generate sufficient output exception information so algorithmic problems can be addressed. except Exception as except_details: traceback.print_exc() pytest.fail("TEST_RUN_SVMPOLLER. Exception Dataset: {}\n", dataset) return_value = 1 assert return_value == 0 current_dt = datetime.datetime.now() print(str(current_dt)) # Return to original directory os.chdir(prevdir)