def test_filename_parser_whole_filesystem(): """Test the filename_parser on all files currently in the filesystem. """ # Get all files filesystem_dir = get_config()['filesystem'] all_files = [] for dir_name, _, file_list in os.walk(filesystem_dir): for file in file_list: if file.endswith('.fits'): all_files.append(os.path.join(dir_name, file)) # Run the filename_parser on all files bad_filenames = [] for filepath in all_files: try: filename_parser(filepath) except ValueError: bad_filenames.append(os.path.basename(filepath)) # Determine if the test failed fail = bad_filenames != [] failure_msg = '{} files could not be successfully parsed: \n - {}'.\ format(len(bad_filenames), '\n - '.join(bad_filenames)) # Check which ones failed assert not fail, failure_msg
def test_filename_parser_nonJWST(): """Attempt to generate a file parameter dictionary from a file that is not formatted in the JWST naming convention. Ensure the appropriate error is raised. """ with pytest.raises(ValueError): filename = 'not_a_jwst_file.fits' filename_parser(filename)
def test_filename_parser_nonJWST(): '''Attempt to generate a file parameter dictionary from a file that is not formatted in the JWST naming convention. Ensure the appropriate error is raised. ''' with pytest.raises( ValueError, match= r'Provided file .+ does not follow JWST naming conventions \(jw<PPPPP><OOO><VVV>_<GGSAA>_<EEEEE>_<detector>_<suffix>\.fits\)' ): filename = 'not_a_jwst_file.fits' filename_parser(filename)
def gather_statistics(general_results_dict, instrument_results_dict): """Walks the filesytem to gather various statistics to eventually store in the database Parameters ---------- general_results_dict : dict A dictionary for the ``filesystem_general`` database table instrument_results_dict : dict A dictionary for the ``filesystem_instrument`` database table Returns ------- general_results_dict : dict A dictionary for the ``filesystem_general`` database table instrument_results_dict : dict A dictionary for the ``filesystem_instrument`` database table """ logging.info('Gathering stats for filesystem') for filesystem in [PROPRIETARY_FILESYSTEM, PUBLIC_FILESYSTEM]: for dirpath, _, files in os.walk(FILESYSTEM): general_results_dict['total_file_count'] += len(files) for filename in files: file_path = os.path.join(dirpath, filename) general_results_dict['total_file_size'] += os.path.getsize(file_path) if filename.endswith(".fits"): # Parse out filename information try: filename_dict = filename_parser(filename) except ValueError: break filetype = filename_dict['suffix'] instrument = filename_dict['instrument'] # Populate general stats general_results_dict['fits_file_count'] += 1 general_results_dict['fits_file_size'] += os.path.getsize(file_path) # Populate instrument specific stats if instrument not in instrument_results_dict: instrument_results_dict[instrument] = {} if filetype not in instrument_results_dict[instrument]: instrument_results_dict[instrument][filetype] = {} instrument_results_dict[instrument][filetype]['count'] = 0 instrument_results_dict[instrument][filetype]['size'] = 0 instrument_results_dict[instrument][filetype]['count'] += 1 instrument_results_dict[instrument][filetype]['size'] += os.path.getsize(file_path) / (2**40) # Convert file sizes to terabytes general_results_dict['total_file_size'] = general_results_dict['total_file_size'] / (2**40) general_results_dict['fits_file_size'] = general_results_dict['fits_file_size'] / (2**40) logging.info('\t{} fits files found in filesystem'.format(general_results_dict['fits_file_count'])) return general_results_dict, instrument_results_dict
def clean_search(self): """Validate the "search" field. Check that the input is either a proposal or fileroot, and one that matches files in the filesystem. Returns ------- str The cleaned data input into the "search" field """ # Get the cleaned search data search = self.cleaned_data['search'] # Make sure the search is either a proposal or fileroot if len(search) == 5 and search.isnumeric(): self.search_type = 'proposal' elif self._search_is_fileroot(search): self.search_type = 'fileroot' else: raise forms.ValidationError('Invalid search term {}. Please provide proposal number ' 'or file root.'.format(search)) # If they searched for a proposal... if self.search_type == 'proposal': # See if there are any matching proposals and, if so, what # instrument they are for search_string = os.path.join(FILESYSTEM_DIR, 'jw{}'.format(search), '*{}*.fits'.format(search)) all_files = glob.glob(search_string) if len(all_files) > 0: all_instruments = [] for file in all_files: instrument = filename_parser(file)['instrument'] all_instruments.append(instrument) if len(set(all_instruments)) > 1: raise forms.ValidationError('Cannot return result for proposal with multiple ' 'instruments.') self.instrument = all_instruments[0] else: raise forms.ValidationError('Proposal {} not in the filesystem.'.format(search)) # If they searched for a fileroot... elif self.search_type == 'fileroot': # See if there are any matching fileroots and, if so, what # instrument they are for search_string = os.path.join(FILESYSTEM_DIR, search[:7], '{}*.fits'.format(search)) all_files = glob.glob(search_string) if len(all_files) == 0: raise forms.ValidationError('Fileroot {} not in the filesystem.'.format(search)) instrument = search.split('_')[-1][:3] self.instrument = JWST_INSTRUMENT_NAMES_SHORTHAND[instrument] return self.cleaned_data['search']
def test_filename_parser(filename, solution): """Generate a dictionary with parameters from a JWST filename. Assert that the dictionary matches what is expected. Parameters ---------- filename : str The filename to test (e.g. ``jw00327001001_02101_00002_nrca1_rate.fits``) solution : dict A dictionary of the expected result """ assert filename_parser(filename) == solution
def create_dummy_filename(filelist): """Create a dummy filename indicating the detectors used to create the mosaic. Check the list of detectors used to determine the proper text to substitute into the initial filename. Parameters ---------- filelist : list List of filenames containing the data used to create the mosaic. It is assumed these filenames follow JWST filenaming conventions. Returns ------- dummy_name : str The first filename in ``filelist`` is modified, such that the detector name is replaced with text indicating the source of the mosaic data. """ det_string_list = [] modules = [] for filename in filelist: indir, infile = os.path.split(filename) det_string = filename_parser(infile)['detector'] det_string_list.append(det_string) modules.append(det_string[3].upper()) # Previous sorting means that either all of the # input files are LW, or all are SW. So we can check any # file to determine LW vs SW if '5' in det_string_list[0]: suffix = "NRC_LW_MOSAIC" else: moda = modules.count('A') modb = modules.count('B') if moda > 0: if modb > 0: suffix = "NRC_SWALL_MOSAIC" else: suffix = "NRC_SWA_MOSAIC" else: if modb > 0: suffix = "NRC_SWB_MOSAIC" dummy_name = filelist[0].replace(det_string_list[0], suffix) return dummy_name
def check_existence(file_list, outdir): """Given a list of fits files, determine if a preview image has already been created in ``outdir``. Parameters ---------- file_list : list List of fits filenames from which preview image will be generated outdir : str Directory that will contain the preview image if it exists Returns ------- exists : bool ``True`` if preview image exists, ``False`` if it does not """ # If file_list contains only a single file, then we need to search # for a preview image name that contains the detector name if len(file_list) == 1: filename = os.path.split(file_list[0])[1] search_string = filename.split('.fits')[0] + '_*.jpg' else: # If file_list contains multiple files, then we need to search # for the appropriately named jpg of the mosaic, which depends # on the specific detectors in the file_list file_parts = filename_parser(file_list[0]) if file_parts['detector'].upper() in NIRCAM_SHORTWAVE_DETECTORS: mosaic_str = "NRC_SW*_MOSAIC_" elif file_parts['detector'].upper() in NIRCAM_LONGWAVE_DETECTORS: mosaic_str = "NRC_LW*_MOSAIC_" search_string = 'jw{}{}{}_{}{}{}_{}_{}{}*.jpg'.format( file_parts['program_id'], file_parts['observation'], file_parts['visit'], file_parts['visit_group'], file_parts['parallel_seq_id'], file_parts['activity'], file_parts['exposure_id'], mosaic_str, file_parts['suffix']) current_files = glob.glob(os.path.join(outdir, search_string)) if len(current_files) > 0: return True else: return False
def _search_is_fileroot(self, search): """Determine if a search value is formatted like a fileroot. Parameters ---------- search : str The search term input by the user. Returns ------- bool Is the search term formatted like a fileroot? """ try: self.fileroot_dict = filename_parser(search) return True except ValueError: return False
def test_filename_parser_filename(): '''Generate a dictionary with parameters from a JWST filename. Assert that the dictionary matches what is expected. ''' filename = 'jw00327001001_02101_00002_nrca1_rate.fits' filename_dict = filename_parser(filename) correct_dict = { 'activity': '01', 'detector': 'nrca1', 'exposure_id': '00002', 'observation': '001', 'parallel_seq_id': '1', 'program_id': '00327', 'suffix': 'rate', 'visit': '001', 'visit_group': '02' } assert filename_dict == correct_dict
def test_filename_parser_filepath(): '''Generate a dictionary with parameters from a JWST filepath (not just the basename). Assert that the dictionary matches what is expected. ''' filepath = '/test/dir/to/the/file/jw90002/jw90002001001_02102_00001_nis_rateints.fits' filename_dict = filename_parser(filepath) correct_dict = { 'activity': '02', 'detector': 'nis', 'exposure_id': '00001', 'observation': '001', 'parallel_seq_id': '1', 'program_id': '90002', 'suffix': 'rateints', 'visit': '001', 'visit_group': '02' } assert filename_dict == correct_dict
def revert_filesystem(): """Perform the opposite of ``organize_filesystem`` -- this function will move files from a MAST-data-cache-like organization to the previous organization. For example, the file ``<filesystem>/jw00312/jw00312002001/jw00312002001_02102_00001_nrcb4_rateints.fits` will be placed in the directory ``<old_filesystem>/jw00312/``. """ # Walk through list of files to process for directory, _, files in os.walk(SETTINGS['filesystem']): print('Processing {}'.format(directory)) for filename in files: # Parse the filename for metadata src = os.path.join(directory, filename) filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed try: destination_directory = os.path.join( SETTINGS['old_filesystem'], 'jw{}'.format(filename_dict['program_id'])) except KeyError: # Some filenames do not have a program_id/observation/visit structure break # Build complete destination location dst = os.path.join(destination_directory, os.path.basename(src)) # Create parent directories if necessary #ensure_dir_exists(destination_directory) # Move the file over #shutil.move(src, dst) print('\tMoved {} to {}'.format(src, dst))
def organize_filesystem(): """The main function of the ``organize_filesystem`` module. See module docstrings for further details. """ # Walk through list of files to process for directory, _, files in os.walk(SETTINGS['old_filesystem']): print('Processing {}'.format(directory)) for filename in files: # Parse the filename for metadata src = os.path.join(directory, filename) filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed try: destination_directory = os.path.join( SETTINGS['filesystem'], 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit'])) except KeyError: # Some filenames do not have a program_id/observation/visit structure break # Build complete destination location dst = os.path.join(destination_directory, os.path.basename(src)) # Create parent directories if necessary #ensure_dir_exists(destination_directory) # Move the file over #shutil.move(src, dst) print('\tMoved {} to {}'.format(src, dst))
def thumbnails(inst, proposal=None): """Generate a page showing thumbnail images corresponding to activities, from a given ``proposal`` Parameters ---------- inst : str Name of JWST instrument proposal : str (optional) Number of APT proposal to filter Returns ------- dict_to_render : dict Dictionary of parameters for the thumbnails """ filepaths = get_filenames_by_instrument(inst) # JUST FOR DEVELOPMENT # Split files into "archived" and "unlooked" if proposal is not None: page_type = 'archive' else: page_type = 'unlooked' filepaths = split_files(filepaths, page_type) # Determine file ID (everything except suffix) # e.g. jw00327001001_02101_00002_nrca1 full_ids = set( ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths]) # If the proposal is specified (i.e. if the page being loaded is # an archive page), only collect data for given proposal if proposal is not None: full_ids = [f for f in full_ids if f[2:7] == proposal] # Group files by ID file_data = [] detectors = [] proposals = [] for i, file_id in enumerate(full_ids): suffixes = [] count = 0 for file in filepaths: if '_'.join(file.split('/')[-1].split('_')[:-1]) == file_id: count += 1 # Parse filename try: file_dict = filename_parser(file) except ValueError: # Temporary workaround for noncompliant files in filesystem file_dict = { 'activity': file_id[17:19], 'detector': file_id[26:], 'exposure_id': file_id[20:25], 'observation': file_id[7:10], 'parallel_seq_id': file_id[16], 'program_id': file_id[2:7], 'suffix': file.split('/')[-1].split('.')[0].split('_')[-1], 'visit': file_id[10:13], 'visit_group': file_id[14:16] } # Determine suffix suffix = file_dict['suffix'] suffixes.append(suffix) hdr = fits.getheader(file, ext=0) exp_start = hdr['EXPSTART'] suffixes = list(set(suffixes)) # Add parameters to sort by if file_dict['detector'] not in detectors and \ not file_dict['detector'].startswith('f'): detectors.append(file_dict['detector']) if file_dict['program_id'] not in proposals: proposals.append(file_dict['program_id']) file_dict['exp_start'] = exp_start file_dict['suffixes'] = suffixes file_dict['file_count'] = count file_dict['file_root'] = file_id file_data.append(file_dict) file_indices = np.arange(len(file_data)) # Extract information for sorting with dropdown menus # (Don't include the proposal as a sorting parameter if the # proposal has already been specified) if proposal is not None: dropdown_menus = {'detector': detectors} else: dropdown_menus = {'detector': detectors, 'proposal': proposals} dict_to_render = { 'inst': inst, 'all_filenames': [os.path.basename(f) for f in filepaths], 'tools': MONITORS, 'thumbnail_zipped_list': zip(file_indices, file_data), 'dropdown_menus': dropdown_menus, 'n_fileids': len(file_data), 'prop': proposal } return dict_to_render
def monitor_template_main(): """ The main function of the ``monitor_template`` module.""" # Example of logging my_variable = 'foo' logging.info('Some useful information: {}'.format(my_variable)) # Example of querying for a dataset via MAST API service = "Mast.Jwst.Filtered.Niriss" params = { "columns": "filename", "filters": [{ "paramName": "filter", "values": ['F430M'] }] } response = Mast.service_request_async(service, params) result = response[0].json()['data'] filename_of_interest = result[0][ 'filename'] # jw00304002001_02102_00001_nis_uncal.fits # Example of parsing a filename filename_dict = filename_parser(filename_of_interest) # Contents of filename_dict: # {'program_id': '00304', # 'observation': '002', # 'visit': '001', # 'visit_group': '02', # 'parallel_seq_id': '1', # 'activity': '02', # 'exposure_id': '00001', # 'detector': 'nis', # 'suffix': 'uncal'} # Example of locating a dataset in the filesystem filesystem = get_config()['filesystem'] dataset = os.path.join(filesystem, 'jw{}'.format(filename_dict['program_id']), filename_of_interest) # Example of reading in dataset using jwst.datamodels im = datamodels.open(dataset) # Now have access to: # im.data # Data array # im.err # ERR array # im.meta # Metadata such as header keywords # Example of saving a file and setting permissions im.save('some_filename.fits') set_permissions('some_filename.fits') # Example of creating and exporting a Bokeh plot plt = Donut(im.data, plot_width=600, plot_height=600) plt.sizing_mode = 'stretch_both' # Necessary for responsive sizing on web app script, div = components(plt) plot_output_dir = get_config()['outputs'] div_outfile = os.path.join(plot_output_dir, 'monitor_name', filename_of_interest + "_component.html") script_outfile = os.path.join(plot_output_dir, 'monitor_name', filename_of_interest + "_component.js") for outfile, component in zip([div_outfile, script_outfile], [div, script]): with open(outfile, 'w') as f: f.write(component) f.close() set_permissions(outfile) # Perform any other necessary code well_named_variable = "Function does something." result_of_second_function = second_function(well_named_variable)
def monitor_filesystem(): """Tabulates the inventory of the JWST filesystem, saving statistics to files, and generates plots. """ # Begin logging logging.info('Beginning filesystem monitoring.') # Get path, directories and files in system and count files in all directories settings = get_config() filesystem = settings['filesystem'] outputs_dir = os.path.join(settings['outputs'], 'monitor_filesystem') # set up dictionaries for output results_dict = defaultdict(int) size_dict = defaultdict(float) # Walk through all directories recursively and count files logging.info('Searching filesystem...') for dirpath, dirs, files in os.walk(filesystem): results_dict['file_count'] += len(files) # find number of all files for filename in files: file_path = os.path.join(dirpath, filename) if filename.endswith(".fits"): # find total number of fits files results_dict['fits_files'] += 1 size_dict['size_fits'] += os.path.getsize(file_path) suffix = filename_parser(filename)['suffix'] results_dict[suffix] += 1 size_dict[suffix] += os.path.getsize(file_path) detector = filename_parser(filename)['detector'] instrument = detector[ 0: 3] # first three characters of detector specify instrument results_dict[instrument] += 1 size_dict[instrument] += os.path.getsize(file_path) logging.info('{} files found in filesystem'.format( results_dict['fits_files'])) # Get df style stats on file system out = subprocess.check_output('df {}'.format(filesystem), shell=True) outstring = out.decode( "utf-8") # put into string for parsing from byte format parsed = outstring.split(sep=None) # Select desired elements from parsed string total = int(parsed[8]) # in blocks of 512 bytes used = int(parsed[9]) available = int(parsed[10]) percent_used = parsed[11] # Save stats for plotting over time now = datetime.datetime.now().isoformat( sep='T', timespec='auto') # get date of stats # set up output file and write stats statsfile = os.path.join(outputs_dir, 'statsfile.txt') with open(statsfile, "a+") as f: f.write("{0} {1:15d} {2:15d} {3:15d} {4:15d} {5}\n".format( now, results_dict['file_count'], total, available, used, percent_used)) set_permissions(statsfile) logging.info('Saved file statistics to: {}'.format(statsfile)) # set up and read out stats on files by type filesbytype = os.path.join(outputs_dir, 'filesbytype.txt') with open(filesbytype, "a+") as f2: f2.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format( results_dict['fits_files'], results_dict['uncal'], results_dict['cal'], results_dict['rate'], results_dict['rateints'], results_dict['i2d'], results_dict['nrc'], results_dict['nrs'], results_dict['nis'], results_dict['mir'], results_dict['gui'])) set_permissions(filesbytype, verbose=False) logging.info('Saved file statistics by type to {}'.format(filesbytype)) # set up file size by type file sizebytype = os.path.join(outputs_dir, 'sizebytype.txt') with open(sizebytype, "a+") as f3: f3.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format( size_dict['size_fits'], size_dict['uncal'], size_dict['cal'], size_dict['rate'], size_dict['rateints'], size_dict['i2d'], size_dict['nrc'], size_dict['nrs'], size_dict['nis'], size_dict['mir'], size_dict['gui'])) set_permissions(sizebytype, verbose=False) logging.info('Saved file sizes by type to {}'.format(sizebytype)) logging.info('Filesystem statistics calculation complete.') # Create the plots plot_system_stats(statsfile, filesbytype, sizebytype)
def process_program(program): """Generate preview images and thumbnails for the given program. Parameters ---------- program : str The program identifier (e.g. ``88600``) """ # Group together common exposures filenames = glob.glob( os.path.join(get_config()['filesystem'], program, '*.fits')) grouped_filenames = group_filenames(filenames) logging.info('Found {} filenames'.format(len(filenames))) for file_list in grouped_filenames: filename = file_list[0] # Determine the save location try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) except ValueError: identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join( get_config()['preview_image_filesystem'], identifier) thumbnail_output_directory = os.path.join( get_config()['thumbnail_filesystem'], identifier) # Check to see if the preview images already exist and skip if they do file_exists = check_existence(file_list, preview_output_directory) if file_exists: logging.info( "JPG already exists for {}, skipping.".format(filename)) continue # Create the output directories if necessary if not os.path.exists(preview_output_directory): os.makedirs(preview_output_directory) permissions.set_permissions(preview_output_directory) logging.info( 'Created directory {}'.format(preview_output_directory)) if not os.path.exists(thumbnail_output_directory): os.makedirs(thumbnail_output_directory) permissions.set_permissions(thumbnail_output_directory) logging.info( 'Created directory {}'.format(thumbnail_output_directory)) # If the exposure contains more than one file (because more # than one detector was used), then create a mosaic max_size = 8 numfiles = len(file_list) if numfiles > 1: try: mosaic_image, mosaic_dq = create_mosaic(file_list) logging.info('Created mosiac for:') for item in file_list: logging.info('\t{}'.format(item)) except (ValueError, FileNotFoundError) as error: logging.error(error) dummy_file = create_dummy_filename(file_list) if numfiles in [2, 4]: max_size = 16 elif numfiles in [8]: max_size = 32 # Create the nominal preview image and thumbnail try: im = PreviewImage(filename, "SCI") im.clip_percent = 0.01 im.scaling = 'log' im.cmap = 'viridis' im.output_format = 'jpg' im.preview_output_directory = preview_output_directory im.thumbnail_output_directory = thumbnail_output_directory # If a mosaic was made from more than one file # insert it and it's associated DQ array into the # instance of PreviewImage. Also set the input # filename to indicate that we have mosaicked data if numfiles != 1: im.data = mosaic_image im.dq = mosaic_dq im.file = dummy_file im.make_image(max_img_size=max_size) logging.info( 'Created preview image and thumbnail for: {}'.format(filename)) except ValueError as error: logging.warning(error)
def group_filenames(filenames): """Given a list of JWST filenames, group together files from the same exposure. These files will share the same ``program_id``, ``observation``, ``visit``, ``visit_group``, ``parallel_seq_id``, ``activity``, ``exposure``, and ``suffix``. Only the ``detector`` will be different. Currently only NIRCam files for a given exposure will be grouped together. For other instruments multiple files for a given exposure will be kept separate from one another and no mosaic will be made. Stage 3 files will remain as individual files, and will not be grouped together with any other files. Parameters ---------- filenames : list list of filenames Returns ------- grouped : list grouped list of filenames where each element is a list and contains the names of filenames with matching exposure information. """ # Some initializations grouped, matched_names = [], [] filenames.sort() # Loop over each file in the list of good files for filename in filenames: # Holds list of matching files for exposure subgroup = [] # Generate string to be matched with other filenames filename_dict = filename_parser(os.path.basename(filename)) # If the filename was already involved in a match, then skip if filename not in matched_names: # For stage 3 filenames, treat individually if 'stage_3' in filename_dict['filename_type']: matched_names.append(filename) subgroup.append(filename) # Group together stage 1 and 2 filenames elif filename_dict['filename_type'] == 'stage_1_and_2': # Determine detector naming convention if filename_dict['detector'].upper( ) in NIRCAM_SHORTWAVE_DETECTORS: detector_str = 'NRC[AB][1234]' elif filename_dict['detector'].upper( ) in NIRCAM_LONGWAVE_DETECTORS: detector_str = 'NRC[AB]5' else: # non-NIRCam detectors detector_str = filename_dict['detector'].upper() # Build pattern to match against base_output_name = get_base_output_name(filename_dict) match_str = '{}{}_{}.fits'.format(base_output_name, detector_str, filename_dict['suffix']) match_str = os.path.join(os.path.dirname(filename), match_str) pattern = re.compile(match_str, re.IGNORECASE) # Try to match the substring to each good file for file_to_match in filenames: if pattern.match(file_to_match) is not None: matched_names.append(file_to_match) subgroup.append(file_to_match) if len(subgroup) > 0: grouped.append(subgroup) return grouped
def create_mosaic(filenames): """If an exposure comprises data from multiple detectors read in all the appropriate files and create a mosaic so that the preview image will show all the data together. Parameters ---------- filenames : list List of filenames to be combined into a mosaic Returns ------- mosaic_filename : str Name of fits file containing the mosaicked data """ # Use preview_image to load data and create difference image # for each detector. Save in a list data = [] detector = [] data_lower_left = [] for filename in filenames: image = PreviewImage(filename, "SCI") # Now have image.data, image.dq data_dim = len(image.data.shape) if data_dim == 4: diff_im = image.difference_image(image.data) else: diff_im = image.data data.append(diff_im) detector.append(filename_parser(filename)['detector'].upper()) data_lower_left.append((image.xstart, image.ystart)) # Make sure SW and LW data are not being mixed. Create the # appropriately sized numpy array to hold all the data based # on the channel, module, and subarray size mosaic_channel = find_data_channel(detector) full_xdim, full_ydim, full_lower_left = array_coordinates( mosaic_channel, detector, data_lower_left) # Create the array to hold all the data datashape = data[0].shape datadim = len(datashape) if datadim == 2: full_array = np.zeros((1, full_ydim, full_xdim)) * np.nan elif datadim == 3: full_array = np.zeros((datashape[0], full_ydim, full_xdim)) * np.nan else: raise ValueError( ('Difference image for {} must be either 2D or 3D.'.format( filenames[0]))) # Place the data from the individual detectors in the appropriate # places in the final image for pixdata, detect in zip(data, detector): x0, y0 = full_lower_left[detect] if datadim == 2: yd, xd = pixdata.shape full_array[0, y0:y0 + yd, x0:x0 + xd] = pixdata elif datadim == 3: ints, yd, xd = pixdata.shape full_array[:, y0:y0 + yd, x0:x0 + xd] = pixdata # Create associated DQ array and set unpopulated pixels to be skipped # in preview image scaling full_dq = create_dq_array(full_xdim, full_ydim, full_array[0, :, :], mosaic_channel) return full_array, full_dq
def thumbnails_query_ajax(rootnames, insts): """Generate a page that provides data necessary to render the ``thumbnails`` template. Parameters ---------- insts : list of strings Name of JWST instrument proposal : list of strings (optional) Number of APT proposal to filter Returns ------- data_dict : dict Dictionary of data needed for the ``thumbnails`` template """ # Initialize dictionary that will contain all needed data data_dict = {} # dummy variable for view_image when thumbnail is selected data_dict['inst'] = "all" data_dict['file_data'] = {} # Gather data for each rootname for rootname in rootnames: # fit expected format for get_filenames_by_rootname() rootname = rootname.split("_")[0] + '_' + rootname.split("_")[ 1] + '_' + rootname.split("_")[2] + '_' + rootname.split("_")[3] # Parse filename try: filename_dict = filename_parser(rootname) except ValueError: # Temporary workaround for noncompliant files in filesystem filename_dict = { 'activity': rootname[17:19], 'detector': rootname[26:], 'exposure_id': rootname[20:25], 'observation': rootname[7:10], 'parallel_seq_id': rootname[16], 'program_id': rootname[2:7], 'visit': rootname[10:13], 'visit_group': rootname[14:16] } # Get list of available filenames available_files = get_filenames_by_rootname(rootname) # Add data to dictionary data_dict['file_data'][rootname] = {} try: data_dict['file_data'][rootname][ 'inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[ JWST_INSTRUMENT_NAMES_SHORTHAND[rootname[26:29]]] except KeyError: data_dict['file_data'][rootname]['inst'] = "MIRI" print("Warning: assuming instrument is MIRI") data_dict['file_data'][rootname]['filename_dict'] = filename_dict data_dict['file_data'][rootname]['available_files'] = available_files data_dict['file_data'][rootname]['expstart'] = get_expstart(rootname) data_dict['file_data'][rootname]['suffixes'] = [ filename_parser(filename)['suffix'] for filename in available_files ] data_dict['file_data'][rootname]['prop'] = rootname[2:7] # Extract information for sorting with dropdown menus detectors = [ data_dict['file_data'][rootname]['filename_dict']['detector'] for rootname in list(data_dict['file_data'].keys()) ] instruments = [ data_dict['file_data'][rootname]['inst'].lower() for rootname in list(data_dict['file_data'].keys()) ] proposals = [ data_dict['file_data'][rootname]['filename_dict']['program_id'] for rootname in list(data_dict['file_data'].keys()) ] dropdown_menus = { 'instrument': instruments, 'detector': detectors, 'proposal': proposals } data_dict['tools'] = MONITORS data_dict['dropdown_menus'] = dropdown_menus return data_dict
def thumbnails_ajax(inst, proposal=None): """Generate a page that provides data necessary to render the ``thumbnails`` template. Parameters ---------- inst : str Name of JWST instrument proposal : str (optional) Number of APT proposal to filter Returns ------- data_dict : dict Dictionary of data needed for the ``thumbnails`` template """ # Get the available files for the instrument filepaths = get_filenames_by_instrument(inst) # Get set of unique rootnames rootnames = set( ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths]) # If the proposal is specified (i.e. if the page being loaded is # an archive page), only collect data for given proposal if proposal is not None: proposal_string = '{:05d}'.format(int(proposal)) rootnames = [ rootname for rootname in rootnames if rootname[2:7] == proposal_string ] # Initialize dictionary that will contain all needed data data_dict = {} data_dict['inst'] = inst data_dict['file_data'] = {} # Gather data for each rootname for rootname in rootnames: # Parse filename try: filename_dict = filename_parser(rootname) except ValueError: # Temporary workaround for noncompliant files in filesystem filename_dict = { 'activity': rootname[17:19], 'detector': rootname[26:], 'exposure_id': rootname[20:25], 'observation': rootname[7:10], 'parallel_seq_id': rootname[16], 'program_id': rootname[2:7], 'visit': rootname[10:13], 'visit_group': rootname[14:16] } # Get list of available filenames available_files = get_filenames_by_rootname(rootname) # Add data to dictionary data_dict['file_data'][rootname] = {} data_dict['file_data'][rootname]['filename_dict'] = filename_dict data_dict['file_data'][rootname]['available_files'] = available_files data_dict['file_data'][rootname]['expstart'] = get_expstart(rootname) data_dict['file_data'][rootname]['suffixes'] = [ filename_parser(filename)['suffix'] for filename in available_files ] # Extract information for sorting with dropdown menus # (Don't include the proposal as a sorting parameter if the # proposal has already been specified) detectors = [ data_dict['file_data'][rootname]['filename_dict']['detector'] for rootname in list(data_dict['file_data'].keys()) ] proposals = [ data_dict['file_data'][rootname]['filename_dict']['program_id'] for rootname in list(data_dict['file_data'].keys()) ] if proposal is not None: dropdown_menus = {'detector': detectors} else: dropdown_menus = {'detector': detectors, 'proposal': proposals} data_dict['tools'] = MONITORS data_dict['dropdown_menus'] = dropdown_menus data_dict['prop'] = proposal return data_dict
def group_filenames(input_files): """Given a list of JWST filenames, group together files from the same exposure. These files will share the same ``program_id``, ``observation``, ``visit``, ``visit_group``, ``parallel_seq_id``, ``activity``, ``exposure``, and ``suffix``. Only the ``detector`` will be different. Currently only NIRCam files for a given exposure will be grouped together. For other instruments multiple files for a given exposure will be kept separate from one another and no mosaic will be made. Parameters ---------- input_files : list list of filenames Returns ------- grouped : list grouped list of filenames where each element is a list and contains the names of filenames with matching exposure information. """ grouped = [] # Sort files first input_files.sort() goodindex = np.arange(len(input_files)) input_files = np.array(input_files) # Loop over each file in the list of good files for index, full_filename in enumerate(input_files[goodindex]): file_directory, filename = os.path.split(full_filename) # Generate string to be matched with other filenames filename_parts = filename_parser(filename) program = filename_parts['program_id'] observation = filename_parts['observation'] visit = filename_parts['visit'] visit_group = filename_parts['visit_group'] parallel = filename_parts['parallel_seq_id'] activity = filename_parts['activity'] exposure = filename_parts['exposure_id'] detector = filename_parts['detector'].upper() suffix = filename_parts['suffix'] observation_base = f'jw{program}{observation}{visit}_{visit_group}{parallel}{activity}_{exposure}_' if detector in NIRCAM_SHORTWAVE_DETECTORS: detector_str = 'NRC[AB][1234]' elif detector in NIRCAM_LONGWAVE_DETECTORS: detector_str = 'NRC[AB]5' else: # non-NIRCam detectors - should never be used I think?? detector_str = detector match_str = f'{observation_base}{detector_str}_{suffix}.fits' match_str = os.path.join(file_directory, match_str) pattern = re.compile(match_str, re.IGNORECASE) # Try to match the substring to each good file matches = [] matched_name = [] for index2, file2match in enumerate(input_files[goodindex]): match = pattern.match(file2match) # Add any files that match the string if match is not None: matched_name.append(file2match) matches.append(goodindex[index2]) # For any matched files, remove from goodindex so we don't # use them as a basis for matching later all_locs = [] for num in matches: loc = np.where(goodindex == num) all_locs.append(loc[0][0]) if len(all_locs) != 0: # Delete matched file indexes from the list of # files to search goodindex = np.delete(goodindex, all_locs) # Add the list of matched files to the overall list of files grouped.append(matched_name) return grouped
def monitor(self): """ Monitoring script to inventory the JWST filesystem, save file statistics, and generate plots. """ # Begin logging logging.info('Beginning filesystem monitoring.') # re-initialize dictionaries for output results_dict = defaultdict(int) size_dict = defaultdict(float) # Walk through all directories recursively and count files logging.info('Searching filesystem...') for dirpath, dirs, files in os.walk(self.filesystem): results_dict['file_count'] += len( files) # find number of all files for filename in files: file_path = os.path.join(dirpath, filename) if filename.endswith( ".fits"): # find total number of fits files results_dict['fits_files'] += 1 size_dict['size_fits'] += os.path.getsize(file_path) suffix = filename_parser(filename)['suffix'] results_dict[suffix] += 1 size_dict[suffix] += os.path.getsize(file_path) detector = filename_parser(filename)['detector'] instrument = detector[ 0: 3] # first three characters of detector specify instrument results_dict[instrument] += 1 size_dict[instrument] += os.path.getsize(file_path) logging.info('{} files found in filesystem'.format( results_dict['fits_files'])) # Get df style stats on file system out = subprocess.check_output('df {}'.format(self.filesystem), shell=True) outstring = out.decode( "utf-8") # put into string for parsing from byte format parsed = outstring.split(sep=None) # Select desired elements from parsed string stats = { 'total': int(parsed[8]), # in blocks of 512 bytes 'used': int(parsed[9]), 'available': int(parsed[10]), 'percent_used': parsed[11], 'file_count': results_dict.pop('file_count'), 'timestamp': datetime.datetime.now().isoformat( sep='T', timespec='auto') # get date of stats } #store results & sizes in the appropriate dictionaries for key, val in results_dict.items(): self.results[key].append(val) for key, val in size_dict.items(): self.sizes[key].append(val) for key, val in stats.items(): self.statistics[key].append(val) # set up output file and write stats statsfile = os.path.join(self.outputs_dir, 'statsfile.txt') with open(statsfile, "a+") as f: f.write( "{timestamp} {file_count:15d} {total:15d} {available:15d} {used:15d} {percent_used}\n" .format(**stats)) set_permissions(statsfile) logging.info('Saved file statistics to: {}'.format(statsfile)) output_stub = "{fits_files} {uncal} {cal} {rate} {rateints} {i2d} {nrc} {nrs} {nis} {mir} {gui}\n" # set up and read out stats on files by type filesbytype = os.path.join(self.outputs_dir, 'filesbytype.txt') with open(filesbytype, "a+") as f2: f2.write(output_stub.format(**results_dict)) set_permissions(filesbytype, verbose=False) logging.info('Saved file statistics by type to {}'.format(filesbytype)) # set up file size by type file sizebytype = os.path.join(self.outputs_dir, 'sizebytype.txt') with open(sizebytype, "a+") as f3: f3.write(output_stub.format(**size_dict)) set_permissions(sizebytype, verbose=False) logging.info('Saved file sizes by type to {}'.format(sizebytype)) logging.info('Filesystem statistics calculation complete.') #Update the plots based on new information self.update_plots()
def thumbnails(inst, proposal=None): """Generate a page showing thumbnail images corresponding to activities, from a given ``proposal`` Parameters ---------- inst : str Name of JWST instrument proposal : str (optional) Number of APT proposal to filter Returns ------- dict_to_render : dict Dictionary of parameters for the thumbnails """ filepaths = get_filenames_by_instrument(inst) # JUST FOR DEVELOPMENT # Split files into "archived" and "unlooked" if proposal is not None: page_type = 'archive' else: page_type = 'unlooked' filepaths = split_files(filepaths, page_type) # Determine file ID (everything except suffix) # e.g. jw00327001001_02101_00002_nrca1 full_ids = set( ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths]) # If the proposal is specified (i.e. if the page being loaded is # an archive page), only collect data for given proposal if proposal is not None: proposal_string = '{:05d}'.format(int(proposal)) full_ids = [f for f in full_ids if f[2:7] == proposal_string] detectors = [] proposals = [] for i, file_id in enumerate(full_ids): for file in filepaths: if '_'.join(file.split('/')[-1].split('_')[:-1]) == file_id: # Parse filename to get program_id try: program_id = filename_parser(file)['program_id'] detector = filename_parser(file)['detector'] except ValueError: # Temporary workaround for noncompliant files in filesystem program_id = nfile_id[2:7] detector = file_id[26:] # Add parameters to sort by if detector not in detectors and not detector.startswith('f'): detectors.append(detector) if program_id not in proposals: proposals.append(program_id) # Extract information for sorting with dropdown menus # (Don't include the proposal as a sorting parameter if the # proposal has already been specified) if proposal is not None: dropdown_menus = {'detector': detectors} else: dropdown_menus = {'detector': detectors, 'proposal': proposals} dict_to_render = { 'inst': inst, 'tools': MONITORS, 'dropdown_menus': dropdown_menus, 'prop': proposal } return dict_to_render
def get_thumbnails_all_instruments(parameters): """Return a list of thumbnails available in the filesystem for all instruments given requested MAST parameters and queried anomalies. Parameters ---------- parameters: dict A dictionary containing the following keys, some of which are dictionaries: instruments apertures filters detector effexptm_min effexptm_max anomalies Returns ------- thumbnails : list A list of thumbnails available in the filesystem for the given instrument. """ anomalies = parameters['anomalies'] filenames = [] for inst in parameters['instruments']: # Make sure instruments are of the proper format (e.g. "Nircam") instrument = inst[0].upper() + inst[1:].lower() # Query MAST for all rootnames for the instrument service = "Mast.Jwst.Filtered.{}".format(instrument) if (parameters['apertures'][inst.lower()] == []) and (parameters['detectors'][inst.lower()] == []) \ and (parameters['filters'][inst.lower()] == []) and (parameters['exposure_types'][inst.lower()] == []) \ and (parameters['read_patterns'][inst.lower()] == []): params = {"columns": "*", "filters": []} else: query_filters = [] if (parameters['apertures'][inst.lower()] != []): if instrument != "Nircam": query_filters.append({ "paramName": "pps_aper", "values": parameters['apertures'][inst.lower()] }) if instrument == "Nircam": query_filters.append({ "paramName": "apername", "values": parameters['apertures'][inst.lower()] }) if (parameters['detectors'][inst.lower()] != []): query_filters.append({ "paramName": "detector", "values": parameters['detectors'][inst.lower()] }) if (parameters['filters'][inst.lower()] != []): query_filters.append({ "paramName": "filter", "values": parameters['filters'][inst.lower()] }) if (parameters['exposure_types'][inst.lower()] != []): query_filters.append({ "paramName": "exp_type", "values": parameters['exposure_types'][inst.lower()] }) if (parameters['read_patterns'][inst.lower()] != []): query_filters.append({ "paramName": "readpatt", "values": parameters['read_patterns'][inst.lower()] }) params = {"columns": "*", "filters": query_filters} response = Mast.service_request_async(service, params) results = response[0].json()['data'] inst_filenames = [ result['filename'].split('.')[0] for result in results ] filenames.extend(inst_filenames) # Get list of all thumbnails thumbnail_list = glob.glob( os.path.join(THUMBNAIL_FILESYSTEM, '*', '*.thumb')) # Get subset of preview images that match the filenames thumbnails_subset = [ os.path.basename(item) for item in thumbnail_list if os.path.basename(item).split('_integ')[0] in filenames ] # Eliminate any duplicates thumbnails_subset = list(set(thumbnails_subset)) # Determine whether or not queried anomalies are flagged final_subset = [] if anomalies != { 'miri': [], 'nirspec': [], 'niriss': [], 'nircam': [], 'fgs': [] }: for thumbnail in thumbnails_subset: components = thumbnail.split('_') rootname = ''.join((components[0], '_', components[1], '_', components[2], '_', components[3])) try: instrument = filename_parser(thumbnail)['instrument'] thumbnail_anomalies = get_current_flagged_anomalies( rootname, instrument) if thumbnail_anomalies: for anomaly in anomalies[instrument.lower()]: if anomaly.lower() in thumbnail_anomalies: # thumbnail contains an anomaly selected in the query final_subset.append(thumbnail) except KeyError: print("Error with thumbnail: ", thumbnail) else: # if no anomalies are flagged, return all thumbnails from query final_subset = thumbnails_subset return list(set(final_subset))
def clean_search(self): """Validate the "search" field. Check that the input is either a proposal or fileroot, and one that matches files in the filesystem. Returns ------- str The cleaned data input into the "search" field """ # Get the cleaned search data search = self.cleaned_data['search'] # Make sure the search is either a proposal or fileroot if search.isnumeric() and 1 < int(search) < 99999: self.search_type = 'proposal' elif self._search_is_fileroot(search): self.search_type = 'fileroot' else: raise forms.ValidationError('Invalid search term {}. Please provide proposal number ' 'or file root.'.format(search)) # If they searched for a proposal... if self.search_type == 'proposal': # See if there are any matching proposals and, if so, what # instrument they are for proposal_string = '{:05d}'.format(int(search)) search_string_public = os.path.join(get_config()['filesystem'], 'public', 'jw{}'.format(proposal_string), '*', '*{}*.fits'.format(proposal_string)) search_string_proprietary = os.path.join(get_config()['filesystem'], 'proprietary', 'jw{}'.format(proposal_string), '*', '*{}*.fits'.format(proposal_string)) all_files = glob.glob(search_string_public) all_files.extend(glob.glob(search_string_proprietary)) # Ignore "original" files all_files = [filename for filename in all_files if 'original' not in filename] if len(all_files) > 0: all_instruments = [] for file in all_files: instrument = filename_parser(file)['instrument'] all_instruments.append(instrument) if len(set(all_instruments)) > 1: raise forms.ValidationError('Cannot return result for proposal with multiple instruments ({}).'.format(', '.join(set(all_instruments)))) self.instrument = all_instruments[0] else: raise forms.ValidationError('Proposal {} not in the filesystem.'.format(search)) # If they searched for a fileroot... elif self.search_type == 'fileroot': # See if there are any matching fileroots and, if so, what instrument they are for search_string_public = os.path.join(get_config()['filesystem'], 'public', search[:7], search[:13], '{}*.fits'.format(search)) search_string_proprietary = os.path.join(get_config()['filesystem'], 'proprietary', search[:7], search[:13], '{}*.fits'.format(search)) all_files = glob.glob(search_string_public) all_files.extend(glob.glob(search_string_proprietary)) # Ignore "original" files all_files = [filename for filename in all_files if 'original' not in filename] if len(all_files) == 0: raise forms.ValidationError('Fileroot {} not in the filesystem.'.format(search)) instrument = search.split('_')[-1][:3] self.instrument = JWST_INSTRUMENT_NAMES_SHORTHAND[instrument] return self.cleaned_data['search']
def generate_preview_images(): """The main function of the ``generate_preview_image`` module.""" # Begin logging logging.info("Beginning the script run") filesystem = get_config()['filesystem'] preview_image_filesystem = get_config()['preview_image_filesystem'] thumbnail_filesystem = get_config()['thumbnail_filesystem'] filenames = glob(os.path.join(filesystem, '*/*.fits')) grouped_filenames = group_filenames(filenames) logging.info(f"Found {len(filenames)} filenames") for file_list in grouped_filenames: filename = file_list[0] # Determine the save location try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) except ValueError as error: identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join(preview_image_filesystem, identifier) thumbnail_output_directory = os.path.join(thumbnail_filesystem, identifier) # Check to see if the preview images already exist and skip # if they do file_exists = check_existence(file_list, preview_output_directory) if file_exists: logging.info( "JPG already exists for {}, skipping.".format(filename)) continue # Create the output directories if necessary if not os.path.exists(preview_output_directory): os.makedirs(preview_output_directory) permissions.set_permissions(preview_output_directory) logging.info(f'Created directory {preview_output_directory}') if not os.path.exists(thumbnail_output_directory): os.makedirs(thumbnail_output_directory) permissions.set_permissions(thumbnail_output_directory) logging.info(f'Created directory {thumbnail_output_directory}') # If the exposure contains more than one file (because more # than one detector was used), then create a mosaic max_size = 8 numfiles = len(file_list) if numfiles != 1: try: mosaic_image, mosaic_dq = create_mosaic(file_list) logging.info('Created mosiac for:') for item in file_list: logging.info(f'\t{item}') except (ValueError, FileNotFoundError) as error: logging.error(error) dummy_file = create_dummy_filename(file_list) if numfiles in [2, 4]: max_size = 16 elif numfiles in [8]: max_size = 32 # Create the nominal preview image and thumbnail try: im = PreviewImage(filename, "SCI") im.clip_percent = 0.01 im.scaling = 'log' im.cmap = 'viridis' im.output_format = 'jpg' im.preview_output_directory = preview_output_directory im.thumbnail_output_directory = thumbnail_output_directory # If a mosaic was made from more than one file # insert it and it's associated DQ array into the # instance of PreviewImage. Also set the input # filename to indicate that we have mosaicked data if numfiles != 1: im.data = mosaic_image im.dq = mosaic_dq im.file = dummy_file im.make_image(max_img_size=max_size) except ValueError as error: logging.warning(error) # Complete logging: logging.info("Completed.")