Python filename_parser Examples, jwql.utils.utils.filename_parser Python Examples

Example #1

0

Show file

def test_filename_parser_whole_filesystem():
    """Test the filename_parser on all files currently in the filesystem.
    """
    # Get all files
    filesystem_dir = get_config()['filesystem']
    all_files = []
    for dir_name, _, file_list in os.walk(filesystem_dir):
        for file in file_list:
            if file.endswith('.fits'):
                all_files.append(os.path.join(dir_name, file))

    # Run the filename_parser on all files
    bad_filenames = []
    for filepath in all_files:
        try:
            filename_parser(filepath)
        except ValueError:
            bad_filenames.append(os.path.basename(filepath))

    # Determine if the test failed
    fail = bad_filenames != []
    failure_msg = '{} files could not be successfully parsed: \n - {}'.\
        format(len(bad_filenames), '\n - '.join(bad_filenames))

    # Check which ones failed
    assert not fail, failure_msg

Example #2

0

Show file

def test_filename_parser_nonJWST():
    """Attempt to generate a file parameter dictionary from a file
    that is not formatted in the JWST naming convention. Ensure the
    appropriate error is raised.
    """
    with pytest.raises(ValueError):
        filename = 'not_a_jwst_file.fits'
        filename_parser(filename)

Example #3

0

Show file

File: test_utils.py Project: hover2pi/jwql

def test_filename_parser_nonJWST():
    '''Attempt to generate a file parameter dictionary from a file
    that is not formatted in the JWST naming convention. Ensure the
    appropriate error is raised.
    '''
    with pytest.raises(
            ValueError,
            match=
            r'Provided file .+ does not follow JWST naming conventions \(jw<PPPPP><OOO><VVV>_<GGSAA>_<EEEEE>_<detector>_<suffix>\.fits\)'
    ):
        filename = 'not_a_jwst_file.fits'
        filename_parser(filename)

Example #4

0

Show file

File: monitor_filesystem.py Project: cracraft/jwql

def gather_statistics(general_results_dict, instrument_results_dict):
    """Walks the filesytem to gather various statistics to eventually
    store in the database

    Parameters
    ----------
    general_results_dict : dict
        A dictionary for the ``filesystem_general`` database table
    instrument_results_dict : dict
        A dictionary for the ``filesystem_instrument`` database table

    Returns
    -------
    general_results_dict : dict
        A dictionary for the ``filesystem_general`` database table
    instrument_results_dict : dict
        A dictionary for the ``filesystem_instrument`` database table
    """

    logging.info('Gathering stats for filesystem')

    for filesystem in [PROPRIETARY_FILESYSTEM, PUBLIC_FILESYSTEM]:
        for dirpath, _, files in os.walk(FILESYSTEM):
            general_results_dict['total_file_count'] += len(files)
            for filename in files:

                file_path = os.path.join(dirpath, filename)
                general_results_dict['total_file_size'] += os.path.getsize(file_path)

                if filename.endswith(".fits"):

                    # Parse out filename information
                    try:
                        filename_dict = filename_parser(filename)
                    except ValueError:
                        break
                    filetype = filename_dict['suffix']
                    instrument = filename_dict['instrument']

                    # Populate general stats
                    general_results_dict['fits_file_count'] += 1
                    general_results_dict['fits_file_size'] += os.path.getsize(file_path)

                    # Populate instrument specific stats
                    if instrument not in instrument_results_dict:
                        instrument_results_dict[instrument] = {}
                    if filetype not in instrument_results_dict[instrument]:
                        instrument_results_dict[instrument][filetype] = {}
                        instrument_results_dict[instrument][filetype]['count'] = 0
                        instrument_results_dict[instrument][filetype]['size'] = 0
                    instrument_results_dict[instrument][filetype]['count'] += 1
                    instrument_results_dict[instrument][filetype]['size'] += os.path.getsize(file_path) / (2**40)

    # Convert file sizes to terabytes
    general_results_dict['total_file_size'] = general_results_dict['total_file_size'] / (2**40)
    general_results_dict['fits_file_size'] = general_results_dict['fits_file_size'] / (2**40)

    logging.info('\t{} fits files found in filesystem'.format(general_results_dict['fits_file_count']))

    return general_results_dict, instrument_results_dict

Example #5

0

Show file

File: forms.py Project: obviousrebel/jwql

    def clean_search(self):
        """Validate the "search" field.

        Check that the input is either a proposal or fileroot, and one
        that matches files in the filesystem.

        Returns
        -------
        str
            The cleaned data input into the "search" field

        """
        # Get the cleaned search data
        search = self.cleaned_data['search']

        # Make sure the search is either a proposal or fileroot
        if len(search) == 5 and search.isnumeric():
            self.search_type = 'proposal'
        elif self._search_is_fileroot(search):
            self.search_type = 'fileroot'
        else:
            raise forms.ValidationError('Invalid search term {}. Please provide proposal number '
                                        'or file root.'.format(search))

        # If they searched for a proposal...
        if self.search_type == 'proposal':
            # See if there are any matching proposals and, if so, what
            # instrument they are for
            search_string = os.path.join(FILESYSTEM_DIR, 'jw{}'.format(search),
                                         '*{}*.fits'.format(search))
            all_files = glob.glob(search_string)
            if len(all_files) > 0:
                all_instruments = []
                for file in all_files:
                    instrument = filename_parser(file)['instrument']
                    all_instruments.append(instrument)
                if len(set(all_instruments)) > 1:
                    raise forms.ValidationError('Cannot return result for proposal with multiple '
                                                'instruments.')

                self.instrument = all_instruments[0]
            else:
                raise forms.ValidationError('Proposal {} not in the filesystem.'.format(search))

        # If they searched for a fileroot...
        elif self.search_type == 'fileroot':
            # See if there are any matching fileroots and, if so, what
            # instrument they are for
            search_string = os.path.join(FILESYSTEM_DIR, search[:7], '{}*.fits'.format(search))
            all_files = glob.glob(search_string)

            if len(all_files) == 0:
                raise forms.ValidationError('Fileroot {} not in the filesystem.'.format(search))

            instrument = search.split('_')[-1][:3]
            self.instrument = JWST_INSTRUMENT_NAMES_SHORTHAND[instrument]

        return self.cleaned_data['search']

Example #6

0

Show file

def test_filename_parser(filename, solution):
    """Generate a dictionary with parameters from a JWST filename.
    Assert that the dictionary matches what is expected.

    Parameters
    ----------
    filename : str
        The filename to test (e.g. ``jw00327001001_02101_00002_nrca1_rate.fits``)
    solution : dict
        A dictionary of the expected result
    """

    assert filename_parser(filename) == solution

Example #7

0

Show file

def create_dummy_filename(filelist):
    """Create a dummy filename indicating the detectors used to create
    the mosaic. Check the list of detectors used to determine the proper
    text to substitute into the initial filename.

    Parameters
    ----------
    filelist : list
        List of filenames containing the data used to create the mosaic.
        It is assumed these filenames follow JWST filenaming
        conventions.

    Returns
    -------
    dummy_name : str
        The first filename in ``filelist`` is modified, such that the
        detector name is replaced with text indicating the source of the
        mosaic data.
    """

    det_string_list = []
    modules = []
    for filename in filelist:
        indir, infile = os.path.split(filename)
        det_string = filename_parser(infile)['detector']
        det_string_list.append(det_string)
        modules.append(det_string[3].upper())

    # Previous sorting means that either all of the
    # input files are LW, or all are SW. So we can check any
    # file to determine LW vs SW
    if '5' in det_string_list[0]:
        suffix = "NRC_LW_MOSAIC"
    else:
        moda = modules.count('A')
        modb = modules.count('B')
        if moda > 0:
            if modb > 0:
                suffix = "NRC_SWALL_MOSAIC"
            else:
                suffix = "NRC_SWA_MOSAIC"
        else:
            if modb > 0:
                suffix = "NRC_SWB_MOSAIC"
    dummy_name = filelist[0].replace(det_string_list[0], suffix)

    return dummy_name

Example #8

0

Show file

def check_existence(file_list, outdir):
    """Given a list of fits files, determine if a preview image has
    already been created in ``outdir``.

    Parameters
    ----------
    file_list : list
        List of fits filenames from which preview image will be
        generated

    outdir : str
        Directory that will contain the preview image if it exists

    Returns
    -------
    exists : bool
        ``True`` if preview image exists, ``False`` if it does not
    """

    # If file_list contains only a single file, then we need to search
    # for a preview image name that contains the detector name
    if len(file_list) == 1:
        filename = os.path.split(file_list[0])[1]
        search_string = filename.split('.fits')[0] + '_*.jpg'
    else:
        # If file_list contains multiple files, then we need to search
        # for the appropriately named jpg of the mosaic, which depends
        # on the specific detectors in the file_list
        file_parts = filename_parser(file_list[0])
        if file_parts['detector'].upper() in NIRCAM_SHORTWAVE_DETECTORS:
            mosaic_str = "NRC_SW*_MOSAIC_"
        elif file_parts['detector'].upper() in NIRCAM_LONGWAVE_DETECTORS:
            mosaic_str = "NRC_LW*_MOSAIC_"
        search_string = 'jw{}{}{}_{}{}{}_{}_{}{}*.jpg'.format(
            file_parts['program_id'], file_parts['observation'],
            file_parts['visit'], file_parts['visit_group'],
            file_parts['parallel_seq_id'], file_parts['activity'],
            file_parts['exposure_id'], mosaic_str, file_parts['suffix'])

    current_files = glob.glob(os.path.join(outdir, search_string))
    if len(current_files) > 0:
        return True
    else:
        return False

Example #9

0

Show file

    def _search_is_fileroot(self, search):
        """Determine if a search value is formatted like a fileroot.

        Parameters
        ----------
        search : str
            The search term input by the user.

        Returns
        -------
        bool
            Is the search term formatted like a fileroot?

        """
        try:
            self.fileroot_dict = filename_parser(search)
            return True
        except ValueError:
            return False

Example #10

0

Show file

File: test_utils.py Project: hover2pi/jwql

def test_filename_parser_filename():
    '''Generate a dictionary with parameters from a JWST filename.
    Assert that the dictionary matches what is expected.
    '''
    filename = 'jw00327001001_02101_00002_nrca1_rate.fits'
    filename_dict = filename_parser(filename)

    correct_dict = {
        'activity': '01',
        'detector': 'nrca1',
        'exposure_id': '00002',
        'observation': '001',
        'parallel_seq_id': '1',
        'program_id': '00327',
        'suffix': 'rate',
        'visit': '001',
        'visit_group': '02'
    }

    assert filename_dict == correct_dict

Example #11

0

Show file

File: test_utils.py Project: hover2pi/jwql

def test_filename_parser_filepath():
    '''Generate a dictionary with parameters from a JWST filepath
    (not just the basename). Assert that the dictionary matches what
    is expected.
    '''
    filepath = '/test/dir/to/the/file/jw90002/jw90002001001_02102_00001_nis_rateints.fits'
    filename_dict = filename_parser(filepath)

    correct_dict = {
        'activity': '02',
        'detector': 'nis',
        'exposure_id': '00001',
        'observation': '001',
        'parallel_seq_id': '1',
        'program_id': '90002',
        'suffix': 'rateints',
        'visit': '001',
        'visit_group': '02'
    }

    assert filename_dict == correct_dict

Example #12

0

Show file

File: organize_filesystem.py Project: mfixstsci/jwql

def revert_filesystem():
    """Perform the opposite of ``organize_filesystem`` -- this function will move
    files from a MAST-data-cache-like organization to the previous organization.

    For example, the file
    ``<filesystem>/jw00312/jw00312002001/jw00312002001_02102_00001_nrcb4_rateints.fits`
    will be placed in the directory ``<old_filesystem>/jw00312/``.
    """

    # Walk through list of files to process
    for directory, _, files in os.walk(SETTINGS['filesystem']):

        print('Processing {}'.format(directory))

        for filename in files:

            # Parse the filename for metadata
            src = os.path.join(directory, filename)
            filename_dict = filename_parser(src)

            # Build destination path for those filenames that can be parsed
            try:
                destination_directory = os.path.join(
                    SETTINGS['old_filesystem'],
                    'jw{}'.format(filename_dict['program_id']))
            except KeyError:  # Some filenames do not have a program_id/observation/visit structure
                break

            # Build complete destination location
            dst = os.path.join(destination_directory, os.path.basename(src))

            # Create parent directories if necessary
            #ensure_dir_exists(destination_directory)

            # Move the file over
            #shutil.move(src, dst)
            print('\tMoved {} to {}'.format(src, dst))

Example #13

0

Show file

File: organize_filesystem.py Project: mfixstsci/jwql

def organize_filesystem():
    """The main function of the ``organize_filesystem`` module.  See
    module docstrings for further details.
    """

    # Walk through list of files to process
    for directory, _, files in os.walk(SETTINGS['old_filesystem']):

        print('Processing {}'.format(directory))

        for filename in files:

            # Parse the filename for metadata
            src = os.path.join(directory, filename)
            filename_dict = filename_parser(src)

            # Build destination path for those filenames that can be parsed
            try:
                destination_directory = os.path.join(
                    SETTINGS['filesystem'],
                    'jw{}'.format(filename_dict['program_id']),
                    'jw{}{}{}'.format(filename_dict['program_id'],
                                      filename_dict['observation'],
                                      filename_dict['visit']))
            except KeyError:  # Some filenames do not have a program_id/observation/visit structure
                break

            # Build complete destination location
            dst = os.path.join(destination_directory, os.path.basename(src))

            # Create parent directories if necessary
            #ensure_dir_exists(destination_directory)

            # Move the file over
            #shutil.move(src, dst)
            print('\tMoved {} to {}'.format(src, dst))

Example #14

0

Show file

File: data_containers.py Project: hover2pi/jwql

def thumbnails(inst, proposal=None):
    """Generate a page showing thumbnail images corresponding to
    activities, from a given ``proposal``

    Parameters
    ----------
    inst : str
        Name of JWST instrument
    proposal : str (optional)
        Number of APT proposal to filter

    Returns
    -------
    dict_to_render : dict
        Dictionary of parameters for the thumbnails
    """

    filepaths = get_filenames_by_instrument(inst)

    # JUST FOR DEVELOPMENT
    # Split files into "archived" and "unlooked"
    if proposal is not None:
        page_type = 'archive'
    else:
        page_type = 'unlooked'
    filepaths = split_files(filepaths, page_type)

    # Determine file ID (everything except suffix)
    # e.g. jw00327001001_02101_00002_nrca1
    full_ids = set(
        ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths])

    # If the proposal is specified (i.e. if the page being loaded is
    # an archive page), only collect data for given proposal
    if proposal is not None:
        full_ids = [f for f in full_ids if f[2:7] == proposal]

    # Group files by ID
    file_data = []
    detectors = []
    proposals = []
    for i, file_id in enumerate(full_ids):
        suffixes = []
        count = 0
        for file in filepaths:
            if '_'.join(file.split('/')[-1].split('_')[:-1]) == file_id:
                count += 1

                # Parse filename
                try:
                    file_dict = filename_parser(file)
                except ValueError:
                    # Temporary workaround for noncompliant files in filesystem
                    file_dict = {
                        'activity': file_id[17:19],
                        'detector': file_id[26:],
                        'exposure_id': file_id[20:25],
                        'observation': file_id[7:10],
                        'parallel_seq_id': file_id[16],
                        'program_id': file_id[2:7],
                        'suffix':
                        file.split('/')[-1].split('.')[0].split('_')[-1],
                        'visit': file_id[10:13],
                        'visit_group': file_id[14:16]
                    }

                # Determine suffix
                suffix = file_dict['suffix']
                suffixes.append(suffix)

                hdr = fits.getheader(file, ext=0)
                exp_start = hdr['EXPSTART']

        suffixes = list(set(suffixes))

        # Add parameters to sort by
        if file_dict['detector'] not in detectors and \
           not file_dict['detector'].startswith('f'):
            detectors.append(file_dict['detector'])
        if file_dict['program_id'] not in proposals:
            proposals.append(file_dict['program_id'])

        file_dict['exp_start'] = exp_start
        file_dict['suffixes'] = suffixes
        file_dict['file_count'] = count
        file_dict['file_root'] = file_id

        file_data.append(file_dict)
    file_indices = np.arange(len(file_data))

    # Extract information for sorting with dropdown menus
    # (Don't include the proposal as a sorting parameter if the
    # proposal has already been specified)
    if proposal is not None:
        dropdown_menus = {'detector': detectors}
    else:
        dropdown_menus = {'detector': detectors, 'proposal': proposals}

    dict_to_render = {
        'inst': inst,
        'all_filenames': [os.path.basename(f) for f in filepaths],
        'tools': MONITORS,
        'thumbnail_zipped_list': zip(file_indices, file_data),
        'dropdown_menus': dropdown_menus,
        'n_fileids': len(file_data),
        'prop': proposal
    }

    return dict_to_render

Example #15

0

Show file

File: monitor_template.py Project: ttemim/jwql

def monitor_template_main():
    """ The main function of the ``monitor_template`` module."""

    # Example of logging
    my_variable = 'foo'
    logging.info('Some useful information: {}'.format(my_variable))

    # Example of querying for a dataset via MAST API
    service = "Mast.Jwst.Filtered.Niriss"
    params = {
        "columns": "filename",
        "filters": [{
            "paramName": "filter",
            "values": ['F430M']
        }]
    }
    response = Mast.service_request_async(service, params)
    result = response[0].json()['data']
    filename_of_interest = result[0][
        'filename']  # jw00304002001_02102_00001_nis_uncal.fits

    # Example of parsing a filename
    filename_dict = filename_parser(filename_of_interest)
    # Contents of filename_dict:
    #     {'program_id': '00304',
    #      'observation': '002',
    #      'visit': '001',
    #      'visit_group': '02',
    #      'parallel_seq_id': '1',
    #      'activity': '02',
    #      'exposure_id': '00001',
    #      'detector': 'nis',
    #      'suffix': 'uncal'}

    # Example of locating a dataset in the filesystem
    filesystem = get_config()['filesystem']
    dataset = os.path.join(filesystem,
                           'jw{}'.format(filename_dict['program_id']),
                           filename_of_interest)

    # Example of reading in dataset using jwst.datamodels
    im = datamodels.open(dataset)
    # Now have access to:
    #     im.data  # Data array
    #     im.err  # ERR array
    #     im.meta  # Metadata such as header keywords

    # Example of saving a file and setting permissions
    im.save('some_filename.fits')
    set_permissions('some_filename.fits')

    # Example of creating and exporting a Bokeh plot
    plt = Donut(im.data, plot_width=600, plot_height=600)
    plt.sizing_mode = 'stretch_both'  # Necessary for responsive sizing on web app
    script, div = components(plt)

    plot_output_dir = get_config()['outputs']
    div_outfile = os.path.join(plot_output_dir, 'monitor_name',
                               filename_of_interest + "_component.html")
    script_outfile = os.path.join(plot_output_dir, 'monitor_name',
                                  filename_of_interest + "_component.js")

    for outfile, component in zip([div_outfile, script_outfile],
                                  [div, script]):
        with open(outfile, 'w') as f:
            f.write(component)
            f.close()
        set_permissions(outfile)

    # Perform any other necessary code
    well_named_variable = "Function does something."
    result_of_second_function = second_function(well_named_variable)

Example #16

0

Show file

File: monitor_filesystem.py Project: hover2pi/jwql

def monitor_filesystem():
    """Tabulates the inventory of the JWST filesystem, saving
    statistics to files, and generates plots.
    """

    # Begin logging
    logging.info('Beginning filesystem monitoring.')

    # Get path, directories and files in system and count files in all directories
    settings = get_config()
    filesystem = settings['filesystem']
    outputs_dir = os.path.join(settings['outputs'], 'monitor_filesystem')

    # set up dictionaries for output
    results_dict = defaultdict(int)
    size_dict = defaultdict(float)
    # Walk through all directories recursively and count files
    logging.info('Searching filesystem...')
    for dirpath, dirs, files in os.walk(filesystem):
        results_dict['file_count'] += len(files)  # find number of all files
        for filename in files:
            file_path = os.path.join(dirpath, filename)
            if filename.endswith(".fits"):  # find total number of fits files
                results_dict['fits_files'] += 1
                size_dict['size_fits'] += os.path.getsize(file_path)
                suffix = filename_parser(filename)['suffix']
                results_dict[suffix] += 1
                size_dict[suffix] += os.path.getsize(file_path)
                detector = filename_parser(filename)['detector']
                instrument = detector[
                    0:
                    3]  # first three characters of detector specify instrument
                results_dict[instrument] += 1
                size_dict[instrument] += os.path.getsize(file_path)
    logging.info('{} files found in filesystem'.format(
        results_dict['fits_files']))

    # Get df style stats on file system
    out = subprocess.check_output('df {}'.format(filesystem), shell=True)
    outstring = out.decode(
        "utf-8")  # put into string for parsing from byte format
    parsed = outstring.split(sep=None)

    # Select desired elements from parsed string
    total = int(parsed[8])  # in blocks of 512 bytes
    used = int(parsed[9])
    available = int(parsed[10])
    percent_used = parsed[11]

    # Save stats for plotting over time
    now = datetime.datetime.now().isoformat(
        sep='T', timespec='auto')  # get date of stats

    # set up output file and write stats
    statsfile = os.path.join(outputs_dir, 'statsfile.txt')
    with open(statsfile, "a+") as f:
        f.write("{0} {1:15d} {2:15d} {3:15d} {4:15d} {5}\n".format(
            now, results_dict['file_count'], total, available, used,
            percent_used))
    set_permissions(statsfile)
    logging.info('Saved file statistics to: {}'.format(statsfile))

    # set up and read out stats on files by type
    filesbytype = os.path.join(outputs_dir, 'filesbytype.txt')
    with open(filesbytype, "a+") as f2:
        f2.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format(
            results_dict['fits_files'], results_dict['uncal'],
            results_dict['cal'], results_dict['rate'],
            results_dict['rateints'], results_dict['i2d'], results_dict['nrc'],
            results_dict['nrs'], results_dict['nis'], results_dict['mir'],
            results_dict['gui']))
    set_permissions(filesbytype, verbose=False)
    logging.info('Saved file statistics by type to {}'.format(filesbytype))

    # set up file size by type file
    sizebytype = os.path.join(outputs_dir, 'sizebytype.txt')
    with open(sizebytype, "a+") as f3:
        f3.write("{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}\n".format(
            size_dict['size_fits'], size_dict['uncal'], size_dict['cal'],
            size_dict['rate'], size_dict['rateints'], size_dict['i2d'],
            size_dict['nrc'], size_dict['nrs'], size_dict['nis'],
            size_dict['mir'], size_dict['gui']))
    set_permissions(sizebytype, verbose=False)
    logging.info('Saved file sizes by type to {}'.format(sizebytype))

    logging.info('Filesystem statistics calculation complete.')

    # Create the plots
    plot_system_stats(statsfile, filesbytype, sizebytype)

Example #17

0

Show file

def process_program(program):
    """Generate preview images and thumbnails for the given program.

    Parameters
    ----------
    program : str
        The program identifier (e.g. ``88600``)
    """

    # Group together common exposures
    filenames = glob.glob(
        os.path.join(get_config()['filesystem'], program, '*.fits'))
    grouped_filenames = group_filenames(filenames)
    logging.info('Found {} filenames'.format(len(filenames)))

    for file_list in grouped_filenames:
        filename = file_list[0]

        # Determine the save location
        try:
            identifier = 'jw{}'.format(filename_parser(filename)['program_id'])
        except ValueError:
            identifier = os.path.basename(filename).split('.fits')[0]
        preview_output_directory = os.path.join(
            get_config()['preview_image_filesystem'], identifier)
        thumbnail_output_directory = os.path.join(
            get_config()['thumbnail_filesystem'], identifier)

        # Check to see if the preview images already exist and skip if they do
        file_exists = check_existence(file_list, preview_output_directory)
        if file_exists:
            logging.info(
                "JPG already exists for {}, skipping.".format(filename))
            continue

        # Create the output directories if necessary
        if not os.path.exists(preview_output_directory):
            os.makedirs(preview_output_directory)
            permissions.set_permissions(preview_output_directory)
            logging.info(
                'Created directory {}'.format(preview_output_directory))
        if not os.path.exists(thumbnail_output_directory):
            os.makedirs(thumbnail_output_directory)
            permissions.set_permissions(thumbnail_output_directory)
            logging.info(
                'Created directory {}'.format(thumbnail_output_directory))

        # If the exposure contains more than one file (because more
        # than one detector was used), then create a mosaic
        max_size = 8
        numfiles = len(file_list)
        if numfiles > 1:
            try:
                mosaic_image, mosaic_dq = create_mosaic(file_list)
                logging.info('Created mosiac for:')
                for item in file_list:
                    logging.info('\t{}'.format(item))
            except (ValueError, FileNotFoundError) as error:
                logging.error(error)
            dummy_file = create_dummy_filename(file_list)
            if numfiles in [2, 4]:
                max_size = 16
            elif numfiles in [8]:
                max_size = 32

        # Create the nominal preview image and thumbnail
        try:
            im = PreviewImage(filename, "SCI")
            im.clip_percent = 0.01
            im.scaling = 'log'
            im.cmap = 'viridis'
            im.output_format = 'jpg'
            im.preview_output_directory = preview_output_directory
            im.thumbnail_output_directory = thumbnail_output_directory

            # If a mosaic was made from more than one file
            # insert it and it's associated DQ array into the
            # instance of PreviewImage. Also set the input
            # filename to indicate that we have mosaicked data
            if numfiles != 1:
                im.data = mosaic_image
                im.dq = mosaic_dq
                im.file = dummy_file

            im.make_image(max_img_size=max_size)
            logging.info(
                'Created preview image and thumbnail for: {}'.format(filename))
        except ValueError as error:
            logging.warning(error)

Example #18

0

Show file

def group_filenames(filenames):
    """Given a list of JWST filenames, group together files from the
    same exposure. These files will share the same ``program_id``,
    ``observation``, ``visit``, ``visit_group``, ``parallel_seq_id``,
    ``activity``, ``exposure``, and ``suffix``. Only the ``detector``
    will be different. Currently only NIRCam files for a given exposure
    will be grouped together. For other instruments multiple files for
    a given exposure will be kept separate from one another and no
    mosaic will be made.  Stage 3 files will remain as individual
    files, and will not be grouped together with any other files.

    Parameters
    ----------
    filenames : list
        list of filenames

    Returns
    -------
    grouped : list
        grouped list of filenames where each element is a list and
        contains the names of filenames with matching exposure
        information.
    """

    # Some initializations
    grouped, matched_names = [], []
    filenames.sort()

    # Loop over each file in the list of good files
    for filename in filenames:

        # Holds list of matching files for exposure
        subgroup = []

        # Generate string to be matched with other filenames
        filename_dict = filename_parser(os.path.basename(filename))

        # If the filename was already involved in a match, then skip
        if filename not in matched_names:

            # For stage 3 filenames, treat individually
            if 'stage_3' in filename_dict['filename_type']:
                matched_names.append(filename)
                subgroup.append(filename)

            # Group together stage 1 and 2 filenames
            elif filename_dict['filename_type'] == 'stage_1_and_2':

                # Determine detector naming convention
                if filename_dict['detector'].upper(
                ) in NIRCAM_SHORTWAVE_DETECTORS:
                    detector_str = 'NRC[AB][1234]'
                elif filename_dict['detector'].upper(
                ) in NIRCAM_LONGWAVE_DETECTORS:
                    detector_str = 'NRC[AB]5'
                else:  # non-NIRCam detectors
                    detector_str = filename_dict['detector'].upper()

                # Build pattern to match against
                base_output_name = get_base_output_name(filename_dict)
                match_str = '{}{}_{}.fits'.format(base_output_name,
                                                  detector_str,
                                                  filename_dict['suffix'])
                match_str = os.path.join(os.path.dirname(filename), match_str)
                pattern = re.compile(match_str, re.IGNORECASE)

                # Try to match the substring to each good file
                for file_to_match in filenames:
                    if pattern.match(file_to_match) is not None:
                        matched_names.append(file_to_match)
                        subgroup.append(file_to_match)

        if len(subgroup) > 0:
            grouped.append(subgroup)

    return grouped

Example #19

0

Show file

def create_mosaic(filenames):
    """If an exposure comprises data from multiple detectors read in all
    the appropriate files and create a mosaic so that the preview image
    will show all the data together.

    Parameters
    ----------
    filenames : list
        List of filenames to be combined into a mosaic

    Returns
    -------
    mosaic_filename : str
        Name of fits file containing the mosaicked data
    """

    # Use preview_image to load data and create difference image
    # for each detector. Save in a list
    data = []
    detector = []
    data_lower_left = []
    for filename in filenames:
        image = PreviewImage(filename, "SCI")  # Now have image.data, image.dq
        data_dim = len(image.data.shape)
        if data_dim == 4:
            diff_im = image.difference_image(image.data)
        else:
            diff_im = image.data
        data.append(diff_im)
        detector.append(filename_parser(filename)['detector'].upper())
        data_lower_left.append((image.xstart, image.ystart))

    # Make sure SW and LW data are not being mixed. Create the
    # appropriately sized numpy array to hold all the data based
    # on the channel, module, and subarray size
    mosaic_channel = find_data_channel(detector)
    full_xdim, full_ydim, full_lower_left = array_coordinates(
        mosaic_channel, detector, data_lower_left)

    # Create the array to hold all the data
    datashape = data[0].shape
    datadim = len(datashape)
    if datadim == 2:
        full_array = np.zeros((1, full_ydim, full_xdim)) * np.nan
    elif datadim == 3:
        full_array = np.zeros((datashape[0], full_ydim, full_xdim)) * np.nan
    else:
        raise ValueError(
            ('Difference image for {} must be either 2D or 3D.'.format(
                filenames[0])))

    # Place the data from the individual detectors in the appropriate
    # places in the final image
    for pixdata, detect in zip(data, detector):
        x0, y0 = full_lower_left[detect]
        if datadim == 2:
            yd, xd = pixdata.shape
            full_array[0, y0:y0 + yd, x0:x0 + xd] = pixdata
        elif datadim == 3:
            ints, yd, xd = pixdata.shape
            full_array[:, y0:y0 + yd, x0:x0 + xd] = pixdata

    # Create associated DQ array and set unpopulated pixels to be skipped
    # in preview image scaling
    full_dq = create_dq_array(full_xdim, full_ydim, full_array[0, :, :],
                              mosaic_channel)

    return full_array, full_dq

Example #20

0

Show file

def thumbnails_query_ajax(rootnames, insts):
    """Generate a page that provides data necessary to render the
    ``thumbnails`` template.

    Parameters
    ----------
    insts : list of strings
        Name of JWST instrument
    proposal : list of strings (optional)
        Number of APT proposal to filter

    Returns
    -------
    data_dict : dict
        Dictionary of data needed for the ``thumbnails`` template
    """

    # Initialize dictionary that will contain all needed data
    data_dict = {}
    # dummy variable for view_image when thumbnail is selected
    data_dict['inst'] = "all"
    data_dict['file_data'] = {}

    # Gather data for each rootname
    for rootname in rootnames:
        # fit expected format for get_filenames_by_rootname()
        rootname = rootname.split("_")[0] + '_' + rootname.split("_")[
            1] + '_' + rootname.split("_")[2] + '_' + rootname.split("_")[3]

        # Parse filename
        try:
            filename_dict = filename_parser(rootname)
        except ValueError:
            # Temporary workaround for noncompliant files in filesystem
            filename_dict = {
                'activity': rootname[17:19],
                'detector': rootname[26:],
                'exposure_id': rootname[20:25],
                'observation': rootname[7:10],
                'parallel_seq_id': rootname[16],
                'program_id': rootname[2:7],
                'visit': rootname[10:13],
                'visit_group': rootname[14:16]
            }

        # Get list of available filenames
        available_files = get_filenames_by_rootname(rootname)

        # Add data to dictionary
        data_dict['file_data'][rootname] = {}
        try:
            data_dict['file_data'][rootname][
                'inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[
                    JWST_INSTRUMENT_NAMES_SHORTHAND[rootname[26:29]]]
        except KeyError:
            data_dict['file_data'][rootname]['inst'] = "MIRI"
            print("Warning: assuming instrument is MIRI")
        data_dict['file_data'][rootname]['filename_dict'] = filename_dict
        data_dict['file_data'][rootname]['available_files'] = available_files
        data_dict['file_data'][rootname]['expstart'] = get_expstart(rootname)
        data_dict['file_data'][rootname]['suffixes'] = [
            filename_parser(filename)['suffix'] for filename in available_files
        ]
        data_dict['file_data'][rootname]['prop'] = rootname[2:7]

    # Extract information for sorting with dropdown menus
    detectors = [
        data_dict['file_data'][rootname]['filename_dict']['detector']
        for rootname in list(data_dict['file_data'].keys())
    ]
    instruments = [
        data_dict['file_data'][rootname]['inst'].lower()
        for rootname in list(data_dict['file_data'].keys())
    ]
    proposals = [
        data_dict['file_data'][rootname]['filename_dict']['program_id']
        for rootname in list(data_dict['file_data'].keys())
    ]

    dropdown_menus = {
        'instrument': instruments,
        'detector': detectors,
        'proposal': proposals
    }

    data_dict['tools'] = MONITORS
    data_dict['dropdown_menus'] = dropdown_menus

    return data_dict

Example #21

0

Show file

File: data_containers.py Project: catherine-martlin/jwql

def thumbnails_ajax(inst, proposal=None):
    """Generate a page that provides data necessary to render the
    ``thumbnails`` template.

    Parameters
    ----------
    inst : str
        Name of JWST instrument
    proposal : str (optional)
        Number of APT proposal to filter

    Returns
    -------
    data_dict : dict
        Dictionary of data needed for the ``thumbnails`` template
    """

    # Get the available files for the instrument
    filepaths = get_filenames_by_instrument(inst)

    # Get set of unique rootnames
    rootnames = set(
        ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths])

    # If the proposal is specified (i.e. if the page being loaded is
    # an archive page), only collect data for given proposal
    if proposal is not None:
        proposal_string = '{:05d}'.format(int(proposal))
        rootnames = [
            rootname for rootname in rootnames
            if rootname[2:7] == proposal_string
        ]

    # Initialize dictionary that will contain all needed data
    data_dict = {}
    data_dict['inst'] = inst
    data_dict['file_data'] = {}

    # Gather data for each rootname
    for rootname in rootnames:

        # Parse filename
        try:
            filename_dict = filename_parser(rootname)
        except ValueError:
            # Temporary workaround for noncompliant files in filesystem
            filename_dict = {
                'activity': rootname[17:19],
                'detector': rootname[26:],
                'exposure_id': rootname[20:25],
                'observation': rootname[7:10],
                'parallel_seq_id': rootname[16],
                'program_id': rootname[2:7],
                'visit': rootname[10:13],
                'visit_group': rootname[14:16]
            }

        # Get list of available filenames
        available_files = get_filenames_by_rootname(rootname)

        # Add data to dictionary
        data_dict['file_data'][rootname] = {}
        data_dict['file_data'][rootname]['filename_dict'] = filename_dict
        data_dict['file_data'][rootname]['available_files'] = available_files
        data_dict['file_data'][rootname]['expstart'] = get_expstart(rootname)
        data_dict['file_data'][rootname]['suffixes'] = [
            filename_parser(filename)['suffix'] for filename in available_files
        ]

    # Extract information for sorting with dropdown menus
    # (Don't include the proposal as a sorting parameter if the
    # proposal has already been specified)
    detectors = [
        data_dict['file_data'][rootname]['filename_dict']['detector']
        for rootname in list(data_dict['file_data'].keys())
    ]
    proposals = [
        data_dict['file_data'][rootname]['filename_dict']['program_id']
        for rootname in list(data_dict['file_data'].keys())
    ]
    if proposal is not None:
        dropdown_menus = {'detector': detectors}
    else:
        dropdown_menus = {'detector': detectors, 'proposal': proposals}

    data_dict['tools'] = MONITORS
    data_dict['dropdown_menus'] = dropdown_menus
    data_dict['prop'] = proposal

    return data_dict

Example #22

0

Show file

File: generate_preview_images.py Project: gkanarek/jwql

def group_filenames(input_files):
    """Given a list of JWST filenames, group together files from the
    same exposure. These files will share the same ``program_id``,
    ``observation``, ``visit``, ``visit_group``, ``parallel_seq_id``,
    ``activity``, ``exposure``, and ``suffix``. Only the ``detector``
    will be different. Currently only NIRCam files for a given exposure
    will be grouped together. For other instruments multiple files for
    a given exposure will be kept separate from one another and no
    mosaic will be made.

    Parameters
    ----------
    input_files : list
        list of filenames

    Returns
    -------
    grouped : list
        grouped list of filenames where each element is a list and
        contains the names of filenames with matching exposure
        information.
    """

    grouped = []

    # Sort files first
    input_files.sort()

    goodindex = np.arange(len(input_files))
    input_files = np.array(input_files)

    # Loop over each file in the list of good files
    for index, full_filename in enumerate(input_files[goodindex]):
        file_directory, filename = os.path.split(full_filename)

        # Generate string to be matched with other filenames
        filename_parts = filename_parser(filename)
        program = filename_parts['program_id']
        observation = filename_parts['observation']
        visit = filename_parts['visit']
        visit_group = filename_parts['visit_group']
        parallel = filename_parts['parallel_seq_id']
        activity = filename_parts['activity']
        exposure = filename_parts['exposure_id']
        detector = filename_parts['detector'].upper()
        suffix = filename_parts['suffix']

        observation_base = f'jw{program}{observation}{visit}_{visit_group}{parallel}{activity}_{exposure}_'

        if detector in NIRCAM_SHORTWAVE_DETECTORS:
            detector_str = 'NRC[AB][1234]'
        elif detector in NIRCAM_LONGWAVE_DETECTORS:
            detector_str = 'NRC[AB]5'
        else:  # non-NIRCam detectors - should never be used I think??
            detector_str = detector
        match_str = f'{observation_base}{detector_str}_{suffix}.fits'
        match_str = os.path.join(file_directory, match_str)
        pattern = re.compile(match_str, re.IGNORECASE)

        # Try to match the substring to each good file
        matches = []
        matched_name = []
        for index2, file2match in enumerate(input_files[goodindex]):
            match = pattern.match(file2match)

            # Add any files that match the string
            if match is not None:
                matched_name.append(file2match)
                matches.append(goodindex[index2])
        # For any matched files, remove from goodindex so we don't
        # use them as a basis for matching later
        all_locs = []
        for num in matches:
            loc = np.where(goodindex == num)
            all_locs.append(loc[0][0])
        if len(all_locs) != 0:
            # Delete matched file indexes from the list of
            # files to search
            goodindex = np.delete(goodindex, all_locs)

            # Add the list of matched files to the overall list of files
            grouped.append(matched_name)

    return grouped

Example #23

0

Show file

    def monitor(self):
        """
        Monitoring script to inventory the JWST filesystem, save file 
        statistics, and generate plots.
        """

        # Begin logging
        logging.info('Beginning filesystem monitoring.')

        # re-initialize dictionaries for output
        results_dict = defaultdict(int)
        size_dict = defaultdict(float)

        # Walk through all directories recursively and count files
        logging.info('Searching filesystem...')
        for dirpath, dirs, files in os.walk(self.filesystem):
            results_dict['file_count'] += len(
                files)  # find number of all files
            for filename in files:
                file_path = os.path.join(dirpath, filename)
                if filename.endswith(
                        ".fits"):  # find total number of fits files
                    results_dict['fits_files'] += 1
                    size_dict['size_fits'] += os.path.getsize(file_path)
                    suffix = filename_parser(filename)['suffix']
                    results_dict[suffix] += 1
                    size_dict[suffix] += os.path.getsize(file_path)
                    detector = filename_parser(filename)['detector']
                    instrument = detector[
                        0:
                        3]  # first three characters of detector specify instrument
                    results_dict[instrument] += 1
                    size_dict[instrument] += os.path.getsize(file_path)
        logging.info('{} files found in filesystem'.format(
            results_dict['fits_files']))

        # Get df style stats on file system
        out = subprocess.check_output('df {}'.format(self.filesystem),
                                      shell=True)
        outstring = out.decode(
            "utf-8")  # put into string for parsing from byte format
        parsed = outstring.split(sep=None)

        # Select desired elements from parsed string
        stats = {
            'total':
            int(parsed[8]),  # in blocks of 512 bytes
            'used':
            int(parsed[9]),
            'available':
            int(parsed[10]),
            'percent_used':
            parsed[11],
            'file_count':
            results_dict.pop('file_count'),
            'timestamp':
            datetime.datetime.now().isoformat(
                sep='T', timespec='auto')  # get date of stats
        }

        #store results & sizes in the appropriate dictionaries
        for key, val in results_dict.items():
            self.results[key].append(val)
        for key, val in size_dict.items():
            self.sizes[key].append(val)
        for key, val in stats.items():
            self.statistics[key].append(val)

        # set up output file and write stats
        statsfile = os.path.join(self.outputs_dir, 'statsfile.txt')
        with open(statsfile, "a+") as f:
            f.write(
                "{timestamp} {file_count:15d} {total:15d} {available:15d} {used:15d} {percent_used}\n"
                .format(**stats))
        set_permissions(statsfile)
        logging.info('Saved file statistics to: {}'.format(statsfile))

        output_stub = "{fits_files} {uncal} {cal} {rate} {rateints} {i2d} {nrc} {nrs} {nis} {mir} {gui}\n"
        # set up and read out stats on files by type
        filesbytype = os.path.join(self.outputs_dir, 'filesbytype.txt')
        with open(filesbytype, "a+") as f2:
            f2.write(output_stub.format(**results_dict))
        set_permissions(filesbytype, verbose=False)
        logging.info('Saved file statistics by type to {}'.format(filesbytype))

        # set up file size by type file
        sizebytype = os.path.join(self.outputs_dir, 'sizebytype.txt')
        with open(sizebytype, "a+") as f3:
            f3.write(output_stub.format(**size_dict))
        set_permissions(sizebytype, verbose=False)
        logging.info('Saved file sizes by type to {}'.format(sizebytype))

        logging.info('Filesystem statistics calculation complete.')

        #Update the plots based on new information
        self.update_plots()

Example #24

0

Show file

File: data_containers.py Project: catherine-martlin/jwql

def thumbnails(inst, proposal=None):
    """Generate a page showing thumbnail images corresponding to
    activities, from a given ``proposal``

    Parameters
    ----------
    inst : str
        Name of JWST instrument
    proposal : str (optional)
        Number of APT proposal to filter

    Returns
    -------
    dict_to_render : dict
        Dictionary of parameters for the thumbnails
    """

    filepaths = get_filenames_by_instrument(inst)

    # JUST FOR DEVELOPMENT
    # Split files into "archived" and "unlooked"
    if proposal is not None:
        page_type = 'archive'
    else:
        page_type = 'unlooked'
    filepaths = split_files(filepaths, page_type)

    # Determine file ID (everything except suffix)
    # e.g. jw00327001001_02101_00002_nrca1
    full_ids = set(
        ['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filepaths])

    # If the proposal is specified (i.e. if the page being loaded is
    # an archive page), only collect data for given proposal
    if proposal is not None:
        proposal_string = '{:05d}'.format(int(proposal))
        full_ids = [f for f in full_ids if f[2:7] == proposal_string]

    detectors = []
    proposals = []
    for i, file_id in enumerate(full_ids):
        for file in filepaths:
            if '_'.join(file.split('/')[-1].split('_')[:-1]) == file_id:

                # Parse filename to get program_id
                try:
                    program_id = filename_parser(file)['program_id']
                    detector = filename_parser(file)['detector']
                except ValueError:
                    # Temporary workaround for noncompliant files in filesystem
                    program_id = nfile_id[2:7]
                    detector = file_id[26:]

        # Add parameters to sort by
        if detector not in detectors and not detector.startswith('f'):
            detectors.append(detector)
        if program_id not in proposals:
            proposals.append(program_id)

    # Extract information for sorting with dropdown menus
    # (Don't include the proposal as a sorting parameter if the
    # proposal has already been specified)
    if proposal is not None:
        dropdown_menus = {'detector': detectors}
    else:
        dropdown_menus = {'detector': detectors, 'proposal': proposals}

    dict_to_render = {
        'inst': inst,
        'tools': MONITORS,
        'dropdown_menus': dropdown_menus,
        'prop': proposal
    }

    return dict_to_render

Example #25

0

Show file

def get_thumbnails_all_instruments(parameters):
    """Return a list of thumbnails available in the filesystem for all
    instruments given requested MAST parameters and queried anomalies.

    Parameters
    ----------
    parameters: dict
        A dictionary containing the following keys, some of which are dictionaries:
            instruments
            apertures
            filters
            detector
            effexptm_min
            effexptm_max
            anomalies

    Returns
    -------
    thumbnails : list
        A list of thumbnails available in the filesystem for the
        given instrument.
    """

    anomalies = parameters['anomalies']

    filenames = []

    for inst in parameters['instruments']:
        # Make sure instruments are of the proper format (e.g. "Nircam")
        instrument = inst[0].upper() + inst[1:].lower()

        # Query MAST for all rootnames for the instrument
        service = "Mast.Jwst.Filtered.{}".format(instrument)

        if (parameters['apertures'][inst.lower()] == []) and (parameters['detectors'][inst.lower()] == []) \
            and (parameters['filters'][inst.lower()] == []) and (parameters['exposure_types'][inst.lower()] == []) \
            and (parameters['read_patterns'][inst.lower()] == []):
            params = {"columns": "*", "filters": []}
        else:
            query_filters = []
            if (parameters['apertures'][inst.lower()] != []):
                if instrument != "Nircam":
                    query_filters.append({
                        "paramName":
                        "pps_aper",
                        "values":
                        parameters['apertures'][inst.lower()]
                    })
                if instrument == "Nircam":
                    query_filters.append({
                        "paramName":
                        "apername",
                        "values":
                        parameters['apertures'][inst.lower()]
                    })
            if (parameters['detectors'][inst.lower()] != []):
                query_filters.append({
                    "paramName":
                    "detector",
                    "values":
                    parameters['detectors'][inst.lower()]
                })
            if (parameters['filters'][inst.lower()] != []):
                query_filters.append({
                    "paramName":
                    "filter",
                    "values":
                    parameters['filters'][inst.lower()]
                })
            if (parameters['exposure_types'][inst.lower()] != []):
                query_filters.append({
                    "paramName":
                    "exp_type",
                    "values":
                    parameters['exposure_types'][inst.lower()]
                })
            if (parameters['read_patterns'][inst.lower()] != []):
                query_filters.append({
                    "paramName":
                    "readpatt",
                    "values":
                    parameters['read_patterns'][inst.lower()]
                })
            params = {"columns": "*", "filters": query_filters}

        response = Mast.service_request_async(service, params)
        results = response[0].json()['data']

        inst_filenames = [
            result['filename'].split('.')[0] for result in results
        ]
        filenames.extend(inst_filenames)

    # Get list of all thumbnails
    thumbnail_list = glob.glob(
        os.path.join(THUMBNAIL_FILESYSTEM, '*', '*.thumb'))

    # Get subset of preview images that match the filenames
    thumbnails_subset = [
        os.path.basename(item) for item in thumbnail_list
        if os.path.basename(item).split('_integ')[0] in filenames
    ]

    # Eliminate any duplicates
    thumbnails_subset = list(set(thumbnails_subset))

    # Determine whether or not queried anomalies are flagged
    final_subset = []

    if anomalies != {
            'miri': [],
            'nirspec': [],
            'niriss': [],
            'nircam': [],
            'fgs': []
    }:
        for thumbnail in thumbnails_subset:
            components = thumbnail.split('_')
            rootname = ''.join((components[0], '_', components[1], '_',
                                components[2], '_', components[3]))
            try:
                instrument = filename_parser(thumbnail)['instrument']
                thumbnail_anomalies = get_current_flagged_anomalies(
                    rootname, instrument)
                if thumbnail_anomalies:
                    for anomaly in anomalies[instrument.lower()]:
                        if anomaly.lower() in thumbnail_anomalies:
                            # thumbnail contains an anomaly selected in the query
                            final_subset.append(thumbnail)
            except KeyError:
                print("Error with thumbnail: ", thumbnail)
    else:
        # if no anomalies are flagged, return all thumbnails from query
        final_subset = thumbnails_subset

    return list(set(final_subset))

Example #26

0

Show file

    def clean_search(self):
        """Validate the "search" field.

        Check that the input is either a proposal or fileroot, and one
        that matches files in the filesystem.

        Returns
        -------
        str
            The cleaned data input into the "search" field
        """

        # Get the cleaned search data
        search = self.cleaned_data['search']

        # Make sure the search is either a proposal or fileroot
        if search.isnumeric() and 1 < int(search) < 99999:
            self.search_type = 'proposal'
        elif self._search_is_fileroot(search):
            self.search_type = 'fileroot'
        else:
            raise forms.ValidationError('Invalid search term {}. Please provide proposal number '
                                        'or file root.'.format(search))

        # If they searched for a proposal...
        if self.search_type == 'proposal':
            # See if there are any matching proposals and, if so, what
            # instrument they are for
            proposal_string = '{:05d}'.format(int(search))
            search_string_public = os.path.join(get_config()['filesystem'], 'public', 'jw{}'.format(proposal_string), '*', '*{}*.fits'.format(proposal_string))
            search_string_proprietary = os.path.join(get_config()['filesystem'], 'proprietary', 'jw{}'.format(proposal_string), '*', '*{}*.fits'.format(proposal_string))
            all_files = glob.glob(search_string_public)
            all_files.extend(glob.glob(search_string_proprietary))

            # Ignore "original" files
            all_files = [filename for filename in all_files if 'original' not in filename]

            if len(all_files) > 0:
                all_instruments = []
                for file in all_files:
                    instrument = filename_parser(file)['instrument']
                    all_instruments.append(instrument)
                if len(set(all_instruments)) > 1:
                    raise forms.ValidationError('Cannot return result for proposal with multiple instruments ({}).'.format(', '.join(set(all_instruments))))

                self.instrument = all_instruments[0]
            else:
                raise forms.ValidationError('Proposal {} not in the filesystem.'.format(search))

        # If they searched for a fileroot...
        elif self.search_type == 'fileroot':
            # See if there are any matching fileroots and, if so, what instrument they are for
            search_string_public = os.path.join(get_config()['filesystem'], 'public', search[:7], search[:13], '{}*.fits'.format(search))
            search_string_proprietary = os.path.join(get_config()['filesystem'], 'proprietary', search[:7], search[:13], '{}*.fits'.format(search))
            all_files = glob.glob(search_string_public)
            all_files.extend(glob.glob(search_string_proprietary))

            # Ignore "original" files
            all_files = [filename for filename in all_files if 'original' not in filename]

            if len(all_files) == 0:
                raise forms.ValidationError('Fileroot {} not in the filesystem.'.format(search))

            instrument = search.split('_')[-1][:3]
            self.instrument = JWST_INSTRUMENT_NAMES_SHORTHAND[instrument]

        return self.cleaned_data['search']

Example #27

0

Show file

File: generate_preview_images.py Project: gkanarek/jwql

def generate_preview_images():
    """The main function of the ``generate_preview_image`` module."""

    # Begin logging
    logging.info("Beginning the script run")

    filesystem = get_config()['filesystem']
    preview_image_filesystem = get_config()['preview_image_filesystem']
    thumbnail_filesystem = get_config()['thumbnail_filesystem']

    filenames = glob(os.path.join(filesystem, '*/*.fits'))
    grouped_filenames = group_filenames(filenames)
    logging.info(f"Found {len(filenames)} filenames")

    for file_list in grouped_filenames:
        filename = file_list[0]
        # Determine the save location
        try:
            identifier = 'jw{}'.format(filename_parser(filename)['program_id'])
        except ValueError as error:
            identifier = os.path.basename(filename).split('.fits')[0]

        preview_output_directory = os.path.join(preview_image_filesystem,
                                                identifier)
        thumbnail_output_directory = os.path.join(thumbnail_filesystem,
                                                  identifier)

        # Check to see if the preview images already exist and skip
        # if they do
        file_exists = check_existence(file_list, preview_output_directory)
        if file_exists:
            logging.info(
                "JPG already exists for {}, skipping.".format(filename))
            continue

        # Create the output directories if necessary
        if not os.path.exists(preview_output_directory):
            os.makedirs(preview_output_directory)
            permissions.set_permissions(preview_output_directory)
            logging.info(f'Created directory {preview_output_directory}')
        if not os.path.exists(thumbnail_output_directory):
            os.makedirs(thumbnail_output_directory)
            permissions.set_permissions(thumbnail_output_directory)
            logging.info(f'Created directory {thumbnail_output_directory}')

        # If the exposure contains more than one file (because more
        # than one detector was used), then create a mosaic
        max_size = 8
        numfiles = len(file_list)
        if numfiles != 1:
            try:
                mosaic_image, mosaic_dq = create_mosaic(file_list)
                logging.info('Created mosiac for:')
                for item in file_list:
                    logging.info(f'\t{item}')
            except (ValueError, FileNotFoundError) as error:
                logging.error(error)
            dummy_file = create_dummy_filename(file_list)
            if numfiles in [2, 4]:
                max_size = 16
            elif numfiles in [8]:
                max_size = 32

        # Create the nominal preview image and thumbnail
        try:
            im = PreviewImage(filename, "SCI")
            im.clip_percent = 0.01
            im.scaling = 'log'
            im.cmap = 'viridis'
            im.output_format = 'jpg'
            im.preview_output_directory = preview_output_directory
            im.thumbnail_output_directory = thumbnail_output_directory

            # If a mosaic was made from more than one file
            # insert it and it's associated DQ array into the
            # instance of PreviewImage. Also set the input
            # filename to indicate that we have mosaicked data
            if numfiles != 1:
                im.data = mosaic_image
                im.dq = mosaic_dq
                im.file = dummy_file

            im.make_image(max_img_size=max_size)
        except ValueError as error:
            logging.warning(error)

    # Complete logging:
    logging.info("Completed.")