Python pathjoin Exemples, desispec.workflow.utils.pathjoin Python Exemples

Exemple #1

0

Afficher le fichier

def get_exposure_table_path(night=None, usespecprod=True):
    """
    Defines the default path to save an exposure table. If night is given, it saves it under a monthly directory
    to reduce the number of files in a large production directory.

    Args:
        night, int or str or None. The night corresponding to the exposure table. If None, no monthly subdirectory is used.
        usespecprod, bool. Whether to use the master version in the exposure table repo or the version in a specprod.

    Returns:
         str. The full path to the directory where the exposure table should be written (or is already written). This
              does not including the filename.
    """
    # if night is None and 'PROD_NIGHT' in os.environ:
    #     night = os.environp['PROD_NIGHT']
    if usespecprod:
        basedir = define_variable_from_environment(env_name='DESI_SPECTRO_REDUX',
                                                      var_descr="The specprod path")
        # subdir = define_variable_from_environment(env_name='USER', var_descr="Username for unique exposure table directories")
        subdir = define_variable_from_environment(env_name='SPECPROD', var_descr="Use SPECPROD for unique exposure table directories")
        basedir = pathjoin(basedir, subdir)
    else:
        basedir = define_variable_from_environment(env_name='DESI_SPECTRO_LOG',
                                                   var_descr="The exposure table repository path")
    if night is None:
        return pathjoin(basedir,'exposure_tables')
    else:
        month = night_to_month(night)
        path = pathjoin(basedir,'exposure_tables',month)
        return path

Exemple #2

0

Afficher le fichier

Fichier : tableio.py Projet : dmargala/desispec

def translate_type_to_pathname(tabletype, use_specprod=True):
    """
    Given the type of table it returns the proper file pathname

    Args:
        tabletype, str. Allows for a flexible number of input options, but should refer to either the 'exposure',
                         'processing', or 'unprocessed' table types.
        use_specprod, bool. If True and tablename not specified and tabletype is exposure table, this looks for the
                            table in the SPECPROD rather than the exptab repository. Default is True.

    Returns:
         tablename, str. Full pathname including extension of the table type. Uses environment variables to determine
                         the location.
    """
    from desispec.workflow.exptable import get_exposure_table_path, get_exposure_table_pathname, get_exposure_table_name
    from desispec.workflow.proctable import get_processing_table_path, get_processing_table_pathname, get_processing_table_name
    tabletype = standardize_tabletype(tabletype)
    if tabletype == 'exptable':
        tablename = get_exposure_table_pathname(night=None,usespecprod=use_specprod)
    elif tabletype == 'proctable':
        tablename = get_processing_table_pathname()
    elif tabletype == 'unproctable':
        tablepath = get_processing_table_path()
        tablename = get_processing_table_name().replace("processing", 'unprocessed')
        tablename = pathjoin(tablepath, tablename)
    return tablename

Exemple #3

0

Afficher le fichier

def get_exposure_table_pathname(night, usespecprod=True, extension='csv'):#base_path,specprod
    """
    Defines the default pathname to save an exposure table.

    Args:
        night, int or str or None. The night corresponding to the exposure table.
        usespecprod, bool. Whether to use the master version or the version in a specprod.

    Returns:
         str. The full pathname where the exposure table should be written (or is already written). This
              includes the filename.
    """
    path = get_exposure_table_path(night, usespecprod=usespecprod)
    table_name = get_exposure_table_name(night, extension)
    return pathjoin(path,table_name)

Exemple #4

0

Afficher le fichier

Fichier : proctable.py Projet : dmargala/desispec

def get_processing_table_path(specprod=None):
    """
    Defines the default path to save a processing table. If specprod is not given, the environment variable
    'SPECPROD' must exist.

    Args:
        specprod, str or None. The name of the production. If None, it will be taken from the environment variable.

    Returns:
         str. The full path to the directory where the processing table should be written (or is already written). This
              does not including the filename.
    """
    if specprod is None:
        specprod = define_variable_from_environment(env_name='SPECPROD',
                                                    var_descr="Use SPECPROD for unique processing table directories")

    basedir = define_variable_from_environment(env_name='DESI_SPECTRO_REDUX',
                                                  var_descr="The specprod path")
    path = pathjoin(basedir, specprod, 'processing_tables')
    return path

Exemple #5

0

Afficher le fichier

Fichier : proctable.py Projet : dmargala/desispec

def get_processing_table_pathname(specprod=None, prodmod=None, extension='csv'):  # base_path,specprod
    """
    Defines the default pathname to save a processing table.

    Args:
        specprod, str or None. The name of the production. If None, it will be taken from the environment variable.
        prodmod, str. Additional str that can be added to the production table name to further differentiate it.
                      Used in daily workflow to add the night to the name and make it unique from other nightly tables.
        extension, str. The extension (and therefore data format) without a leading period of the saved table.
                        Default is 'csv'.

    Returns:
         str. The full pathname where the processing table should be written (or is already written). This
              includes the filename.
    """
    if specprod is None:
        specprod = define_variable_from_environment(env_name='SPECPROD',
                                                    var_descr="Use SPECPROD for unique processing table directories")

    path = get_processing_table_path(specprod)
    table_name = get_processing_table_name(specprod, prodmod, extension)
    return pathjoin(path, table_name)

Exemple #6

0

Afficher le fichier

def daily_processing_manager(specprod=None,
                             exp_table_path=None,
                             proc_table_path=None,
                             path_to_data=None,
                             expobstypes=None,
                             procobstypes=None,
                             camword=None,
                             badcamword=None,
                             badamps=None,
                             override_night=None,
                             tab_filetype='csv',
                             queue='realtime',
                             exps_to_ignore=None,
                             data_cadence_time=30,
                             queue_cadence_time=1800,
                             dry_run=False,
                             continue_looping_debug=False,
                             dont_check_job_outputs=False,
                             dont_resubmit_partial_jobs=False,
                             verbose=False):
    """
    Generates processing tables for the nights requested. Requires exposure tables to exist on disk.

    Args:
        specprod: str. The name of the current production. If used, this will overwrite the SPECPROD environment variable.
        exp_table_path: str. Full path to where to exposure tables are stored, WITHOUT the monthly directory included.
        proc_table_path: str. Full path to where to processing tables to be written.
        path_to_data: str. Path to the raw data.
        expobstypes: str or comma separated list of strings. The exposure OBSTYPE's that you want to include in the exposure table.
        procobstypes: str or comma separated list of strings. The exposure OBSTYPE's that you want to include in the processing table.
        camword: str. Camword that, if set, alters the set of cameras that will be set for processing.
                      Examples: a0123456789, a1, a2b3r3, a2b3r4z3.
        badcamword: str. Camword that, if set, will be removed from the camword defined in camword if given, or the camword
                         inferred from the data if camword is not given.
        badamps: str. Comma seperated list of bad amplifiers that should not be processed. Should be of the
                      form "{camera}{petal}{amp}", i.e. "[brz][0-9][ABCD]". Example: 'b7D,z8A'
        override_night: str or int. 8 digit night, e.g. 20200314, of data to run on. If None, it runs on the current night.
        tab_filetype: str. The file extension (without the '.') of the exposure and processing tables.
        queue: str. The name of the queue to submit the jobs to. Default is "realtime".
        exps_to_ignore: list. A list of exposure id's that should not be processed. Each should be an integer.
        data_cadence_time: int. Wait time in seconds between loops in looking for new data. Default is 30 seconds.
        queue_cadence_time: int. Wait time in seconds between loops in checking queue statuses and resubmitting failures. Default is 1800s.
        dry_run: boolean. If true, no scripts are written and no scripts are submitted. The tables are still generated
                 and written, however. The timing is accelerated. This option is most useful for testing and simulating a run.
        continue_looping_debug: bool. FOR DEBUG purposes only. Will continue looping in search of new data until the process
                                 is terminated. Default is False.
        dont_check_job_outputs, bool. Default is False. If False, the code checks for the existence of the expected final
                                 data products for the script being submitted. If all files exist and this is False,
                                 then the script will not be submitted. If some files exist and this is False, only the
                                 subset of the cameras without the final data products will be generated and submitted.
        dont_resubmit_partial_jobs, bool. Default is False. Must be used with dont_check_job_outputs=False. If this flag is
                                          False, jobs with some prior data are pruned using PROCCAMWORD to only process the
                                          remaining cameras not found to exist.
        verbose: bool. True if you want more verbose output, false otherwise. Current not propagated to lower code,
                       so it is only used in the main daily_processing script itself.

    Returns: Nothing

    Notes:
        Generates both exposure table and processing tables 'on the fly' and saves them at various checkpoints. These
        should be capable of being reloaded in case of interuption or accidental termination of the manager's process.
    """
    ## If not being done during operating hours, and we're not simulating data or running a catchup run, exit.
    if not during_operating_hours(dry_run=dry_run) and override_night is None:
        print(
            "Not during operating hours, and not asked to perform a dry run or run on historic data. Exiting."
        )
        sys.exit(0)

    ## What night are we running on?
    true_night = what_night_is_it()
    if override_night is not None:
        night = int(override_night)
        print(f"True night is {true_night}, but running for night={night}")
    else:
        night = true_night

    if continue_looping_debug:
        print(
            "continue_looping_debug is set. Will continue looking for new data and needs to be terminated by the user."
        )

    ## Recast booleans from double negative
    check_for_outputs = (not dont_check_job_outputs)
    resubmit_partial_complete = (not dont_resubmit_partial_jobs)

    ## Define the obstypes to process
    if procobstypes is None:
        procobstypes = default_exptypes_for_proctable()
    elif isinstance(procobstypes, str):
        procobstypes = procobstypes.split(',')

    ## Define the obstypes to save information for in the exposure table
    if expobstypes is None:
        expobstypes = default_exptypes_for_exptable()
    elif isinstance(expobstypes, str):
        expobstypes = expobstypes.split(',')

    ## expobstypes must contain all the types used in processing
    for typ in procobstypes:
        if typ not in expobstypes:
            expobstypes.append(typ)

    ## Warn people if changing camword
    finalcamword = 'a0123456789'
    if camword is not None and badcamword is None:
        badcamword = difference_camwords(finalcamword, camword)
        finalcamword = camword
    elif camword is not None and badcamword is not None:
        finalcamword = difference_camwords(camword, badcamword)
        badcamword = difference_camwords('a0123456789', finalcamword)
    elif badcamword is not None:
        finalcamword = difference_camwords(finalcamword, badcamword)
    else:
        badcamword = ''

    if badcamword != '':
        ## Inform the user what will be done with it.
        print(f"Modifying camword of data to be processed with badcamword: {badcamword}. "+\
              f"Camword to be processed: {finalcamword}")

    ## Make sure badamps is formatted properly
    if badamps is None:
        badamps = ''
    else:
        badamps = validate_badamps(badamps)

    ## Define the set of exposures to ignore
    if exps_to_ignore is None:
        exps_to_ignore = set()
    else:
        exps_to_ignore = np.sort(np.array(exps_to_ignore).astype(int))
        print(f"\nReceived exposures to ignore: {exps_to_ignore}")
        exps_to_ignore = set(exps_to_ignore)

    ## Adjust wait times if simulating things
    speed_modifier = 1
    if dry_run:
        speed_modifier = 0.1

    ## Get context specific variable values
    nersc_start = nersc_start_time(night=true_night)
    nersc_end = nersc_end_time(night=true_night)
    colnames, coltypes, coldefaults = get_exposure_table_column_defs(
        return_default_values=True)

    ## Define where to find the data
    path_to_data = verify_variable_with_environment(
        var=path_to_data,
        var_name='path_to_data',
        env_name='DESI_SPECTRO_DATA')
    specprod = verify_variable_with_environment(var=specprod,
                                                var_name='specprod',
                                                env_name='SPECPROD')

    ## Define the files to look for
    file_glob = os.path.join(path_to_data, str(night), '*', 'checksum-*')

    ## Determine where the exposure table will be written
    if exp_table_path is None:
        exp_table_path = get_exposure_table_path(night=night, usespecprod=True)
    os.makedirs(exp_table_path, exist_ok=True)
    name = get_exposure_table_name(night=night, extension=tab_filetype)
    exp_table_pathname = pathjoin(exp_table_path, name)

    ## Determine where the processing table will be written
    if proc_table_path is None:
        proc_table_path = get_processing_table_path()
    os.makedirs(proc_table_path, exist_ok=True)
    name = get_processing_table_name(prodmod=night, extension=tab_filetype)
    proc_table_pathname = pathjoin(proc_table_path, name)

    ## Determine where the unprocessed data table will be written
    unproc_table_pathname = pathjoin(proc_table_path,
                                     name.replace('processing', 'unprocessed'))

    ## Combine the table names and types for easier passing to io functions
    table_pathnames = [
        exp_table_pathname, proc_table_pathname, unproc_table_pathname
    ]
    table_types = ['exptable', 'proctable', 'unproctable']

    ## Load in the files defined above
    etable, ptable, unproc_table = load_tables(tablenames=table_pathnames, \
                                               tabletypes=table_types)

    ## Get relevant data from the tables
    all_exps = set(etable['EXPID'])
    arcs,flats,sciences, arcjob,flatjob, \
    curtype,lasttype, curtile,lasttile, internal_id = parse_previous_tables(etable, ptable, night)

    ## While running on the proper night and during night hours,
    ## or doing a dry_run or override_night, keep looping
    while ((night == what_night_is_it())
           and during_operating_hours(dry_run=dry_run)) or (override_night
                                                            is not None):
        ## Get a list of new exposures that have been found
        print(f"\n\n\nPreviously known exposures: {all_exps}")
        located_exps = set(
            sorted([
                int(os.path.basename(os.path.dirname(fil)))
                for fil in glob.glob(file_glob)
            ]))
        new_exps = located_exps.difference(all_exps)
        all_exps = located_exps  # i.e. new_exps.union(all_exps)
        print(f"\nNew exposures: {new_exps}\n\n")

        ## If there aren't any new exps and there won't be more because we're running on an old night or simulating things, exit
        if (not continue_looping_debug) and (override_night
                                             is not None) and (len(
                                                 list(new_exps)) == 0):
            print("Terminating the search for new exposures because no new exposures are present and you have" + \
                  " override_night set without continue_looping_debug")
            break

        ## Loop over new exposures and process them as relevant to that type
        for exp in sorted(list(new_exps)):
            if verbose:
                print(get_printable_banner(str(exp)))
            else:
                print(
                    f'\n\n##################### {exp} #########################'
                )

            ## Open relevant raw data files to understand what we're dealing with
            erow = summarize_exposure(path_to_data,
                                      night,
                                      exp,
                                      expobstypes,
                                      colnames,
                                      coldefaults,
                                      verbosely=False)

            ## If there was an issue, continue. If it's a string summarizing the end of some sequence, use that info.
            ## If the exposure is assosciated with data, process that data.
            if erow is None:
                continue
            elif type(erow) is str:
                if exp in exps_to_ignore:
                    print(
                        f"Located {erow} in exposure {exp}, but the exposure was listed in the expids to ignore. Ignoring this."
                    )
                    continue
                elif erow == 'endofarcs' and arcjob is None and 'arc' in procobstypes:
                    print(
                        "\nLocated end of arc calibration sequence flag. Processing psfnight.\n"
                    )
                    ptable, arcjob, internal_id = arc_joint_fit(
                        ptable,
                        arcs,
                        internal_id,
                        dry_run=dry_run,
                        queue=queue)
                elif erow == 'endofflats' and flatjob is None and 'flat' in procobstypes:
                    print(
                        "\nLocated end of long flat calibration sequence flag. Processing nightlyflat.\n"
                    )
                    ptable, flatjob, internal_id = flat_joint_fit(
                        ptable,
                        flats,
                        internal_id,
                        dry_run=dry_run,
                        queue=queue)
                elif 'short' in erow and flatjob is None:
                    print(
                        "\nLocated end of short flat calibration flag. Removing flats from list for nightlyflat processing.\n"
                    )
                    flats = []
                else:
                    continue
            else:
                erow['BADCAMWORD'] = badcamword
                erow['BADAMPS'] = badamps
                unproc = False
                if exp in exps_to_ignore:
                    print(
                        "\n{} given as exposure id to ignore. Not processing.".
                        format(exp))
                    erow['LASTSTEP'] = 'ignore'
                    # erow['EXPFLAG'] = np.append(erow['EXPFLAG'], )
                    unproc = True
                elif erow['LASTSTEP'] == 'ignore':
                    print(
                        "\n{} identified by the pipeline as something to ignore. Not processing."
                        .format(exp))
                    unproc = True
                elif erow['OBSTYPE'] not in procobstypes:
                    print(
                        "\n{} not in obstypes to process: {}. Not processing.".
                        format(erow['OBSTYPE'], procobstypes))
                    unproc = True
                elif str(erow['OBSTYPE']).lower() == 'arc' and float(
                        erow['EXPTIME']) > 8.0:
                    print(
                        "\nArc exposure with EXPTIME greater than 8s. Not processing."
                    )
                    unproc = True

                print(f"\nFound: {erow}")
                etable.add_row(erow)
                if unproc:
                    unproc_table.add_row(erow)
                    continue

                curtype, curtile = get_type_and_tile(erow)

                if lasttype is not None and ((curtype != lasttype) or
                                             (curtile != lasttile)):
                    ptable, arcjob, flatjob, \
                    sciences, internal_id = checkfor_and_submit_joint_job(ptable, arcs, flats, sciences, arcjob,
                                                                          flatjob,
                                                                          lasttype, internal_id, dry_run=dry_run,
                                                                          queue=queue, strictly_successful=False,
                                                                          check_for_outputs=check_for_outputs,
                                                                          resubmit_partial_complete=resubmit_partial_complete)

                prow = erow_to_prow(erow)
                prow['INTID'] = internal_id
                internal_id += 1
                prow['JOBDESC'] = prow['OBSTYPE']
                prow = define_and_assign_dependency(prow, arcjob, flatjob)
                print(f"\nProcessing: {prow}\n")
                prow = create_and_submit(
                    prow,
                    dry_run=dry_run,
                    queue=queue,
                    strictly_successful=False,
                    check_for_outputs=check_for_outputs,
                    resubmit_partial_complete=resubmit_partial_complete)
                ptable.add_row(prow)

                ## Note: Assumption here on number of flats
                if curtype == 'flat' and flatjob is None and int(
                        erow['SEQTOT']) < 5:
                    flats.append(prow)
                elif curtype == 'arc' and arcjob is None:
                    arcs.append(prow)
                elif curtype == 'science' and prow['LASTSTEP'] != 'skysub':
                    sciences.append(prow)

                lasttile = curtile
                lasttype = curtype

                ## Flush the outputs
                sys.stdout.flush()
                sys.stderr.flush()

            time.sleep(10 * speed_modifier)
            write_tables([etable, ptable, unproc_table],
                         tablenames=[
                             exp_table_pathname, proc_table_pathname,
                             unproc_table_pathname
                         ])

        print("\nReached the end of curent iteration of new exposures.")
        print("Waiting {}s before looking for more new data".format(
            data_cadence_time * speed_modifier))
        time.sleep(data_cadence_time * speed_modifier)

        if len(ptable) > 0:
            ptable = update_from_queue(ptable,
                                       start_time=nersc_start,
                                       end_time=nersc_end,
                                       dry_run=dry_run)
            # ptable, nsubmits = update_and_recurvsively_submit(ptable,start_time=nersc_start,end_time=nersc_end,
            #                                                   ptab_name=proc_table_pathname, dry_run=dry_run)

            ## Exposure table doesn't change in the interim, so no need to re-write it to disk
            write_table(ptable, tablename=proc_table_pathname)
            time.sleep(30 * speed_modifier)

    ## Flush the outputs
    sys.stdout.flush()
    sys.stderr.flush()
    ## No more data coming in, so do bottleneck steps if any apply
    ptable, arcjob, flatjob, \
    sciences, internal_id = checkfor_and_submit_joint_job(ptable, arcs, flats, sciences, arcjob,
                                                          flatjob,
                                                          lasttype, internal_id, dry_run=dry_run,
                                                          queue=queue, strictly_successful=False,
                                                          check_for_outputs=check_for_outputs,
                                                          resubmit_partial_complete=resubmit_partial_complete)

    ## All jobs now submitted, update information from job queue and save
    ptable = update_from_queue(ptable,
                               start_time=nersc_start,
                               end_time=nersc_end,
                               dry_run=dry_run)
    write_table(ptable, tablename=proc_table_pathname)

    print(f"Completed submission of exposures for night {night}.")

    # #######################################
    # ########## Queue Cleanup ##############
    # #######################################
    # print("Now resolving job failures.")
    #
    # ## Flush the outputs
    # sys.stdout.flush()
    # sys.stderr.flush()
    # ## Now we resubmit failed jobs and their dependencies until all jobs have un-submittable end state
    # ## e.g. they either succeeded or failed with a code-related issue
    # ii,nsubmits = 0, 0
    # while ii < 4 and any_jobs_not_complete(ptable['STATUS']):
    #     print(f"Starting iteration {ii} of queue updating and resubmissions of failures.")
    #     ptable, nsubmits = update_and_recurvsively_submit(ptable, submits=nsubmits, start_time=nersc_start,end_time=nersc_end,
    #                                                       ptab_name=proc_table_pathname, dry_run=dry_run)
    #     write_table(ptable, tablename=proc_table_pathname)
    #     if any_jobs_not_complete(ptable['STATUS']):
    #         time.sleep(queue_cadence_time*speed_modifier)
    #
    #     ptable = update_from_queue(ptable,start_time=nersc_start,end_time=nersc_end)
    #     write_table(ptable, tablename=proc_table_pathname)
    #     ## Flush the outputs
    #     sys.stdout.flush()
    #     sys.stderr.flush()
    #     ii += 1
    #
    # print("No job failures left.")
    print("Exiting")
    ## Flush the outputs
    sys.stdout.flush()
    sys.stderr.flush()

Exemple #7

0

Afficher le fichier

def summarize_exposure(raw_data_dir, night, exp, obstypes=None, colnames=None, coldefaults=None, verbosely=False):
    """
    Given a raw data directory and exposure information, this searches for the raw DESI data files for that
    exposure and loads in relevant information for that flavor+obstype. It returns a dictionary if the obstype
    is one of interest for the exposure table, a string if the exposure signifies the end of a calibration sequence,
    and None if the exposure is not in the given obstypes.

    Args:
        raw_data_dir, str. The path to where the raw data is stored. It should be the upper level directory where the
                           nightly subdirectories reside.
        night, str or int. Used to know what nightly subdirectory to look for the given exposure in.
        exp, str or int or float. The exposure number of interest.
        obstypes, list or np.array of str's. The list of 'OBSTYPE' keywords to match to. If a match is found, the
                                             information about that exposure is taken and returned for the exposure
                                             table. Otherwise None is returned (or str if it is an end-of-cal manifest).
                                             If None, the default list in default_exptypes_for_exptable() is used.
        colnames, list or np.array. List of column names for an exposure table. If None, the defaults are taken from
                                    get_exposure_table_column_defs().
        coldefaults, list or np.array. List of default values for the corresponding colnames. If None, the defaults
                                       are taken from get_exposure_table_column_defs().
        verbosely, bool. Whether to print more detailed output (True) or more succinct output (False).

    Returns:
        outdict, dict. Dictionary with keys corresponding to the column names of an exposure table. Values are
                       taken from the data when found, otherwise the values are the corresponding default given in
                       coldefaults.
        OR
        str. If the exposures signifies the end of a calibration sequence, it returns a string describing the type of
             sequence that ended. Either "(short|long|arc) calib complete".
        OR
        NoneType. If the exposure obstype was not in the requested types (obstypes).
    """
    log = get_logger()

    ## Make sure the inputs are in the right format
    if type(exp) is not str:
        exp = int(exp)
        exp = f'{exp:08d}'
    night = str(night)

    ## Use defaults if things aren't defined
    if obstypes is None:
        obstypes = default_exptypes_for_exptable()
    if colnames is None or coldefaults is None:
        cnames, cdtypes, cdflts = get_exposure_table_column_defs(return_default_values=True)
        if colnames is None:
            colnames = cnames
        if coldefaults is None or len(coldefaults)!=len(colnames):
            coldefaults = cdflts
    colnames,coldefaults = np.asarray(colnames),np.asarray(coldefaults,dtype=object)

    ## Give a header for the exposure
    if verbosely:
        log.info(f'\n\n###### Summarizing exposure: {exp} ######\n')
    else:
        log.info(f'Summarizing exposure: {exp}')
    ## Request json file is first used to quickly identify science exposures
    ## If a request file doesn't exist for an exposure, it shouldn't be an exposure we care about
    reqpath = pathjoin(raw_data_dir, night, exp, f'request-{exp}.json')
    if not os.path.isfile(reqpath):
        if verbosely:
            log.info(f'{reqpath} did not exist!')
        else:
            log.info(f'{exp}: skipped  -- request not found')
        return None

    ## Load the json file in as a dictionary
    req_dict = get_json_dict(reqpath)

    ## Check to see if it is a manifest file for calibrations
    if "SEQUENCE" in req_dict and req_dict["SEQUENCE"].lower() == "manifest":
        ## standardize the naming of end of arc/flats as best we can
        if int(night) < 20200310:
            pass
        elif int(night) < 20200801:
            if 'PROGRAM' in req_dict:
                prog = req_dict['PROGRAM'].lower()
                if 'calib' in prog and 'done' in prog:
                    if 'short' in prog:
                        return "endofshortflats"
                    elif 'long' in prog:
                        return 'endofflats'
                    elif 'arc' in prog:
                        return 'endofarcs'
        else:
            if 'MANIFEST' in req_dict:
                manifest = req_dict['MANIFEST']
                if 'name' in manifest:
                    name = manifest['name'].lower()
                    if name in ['endofarcs', 'endofflats', 'endofshortflats']:
                        return name

    ## If FLAVOR is wrong or no obstype is defines, skip it
    if 'FLAVOR' not in req_dict.keys():
        if verbosely:
            log.info(f'WARNING: {reqpath} -- flavor not given!')
        else:
            log.info(f'{exp}: skipped  -- flavor not given!')
        return None

    flavor = req_dict['FLAVOR'].lower()
    if flavor != 'science' and 'dark' not in obstypes and 'zero' not in obstypes:
        ## If FLAVOR is wrong
        if verbosely:
            log.info(f'ignoring: {reqpath} -- {flavor} not a flavor we care about')
        else:
            log.info(f'{exp}: skipped  -- {flavor} not a relevant flavor')
        return None

    if 'OBSTYPE' not in req_dict.keys():
        ## If no obstype is defines, skip it
        if verbosely:
            log.info(f'ignoring: {reqpath} -- {flavor} flavor but obstype not defined')
        else:
            log.info(f'{exp}: skipped  -- obstype not given')
        return None
    else:
        if verbosely:
            log.info(f'using: {reqpath}')

    ## If obstype isn't in our list of ones we care about, skip it
    obstype = req_dict['OBSTYPE'].lower()
    if obstype not in obstypes:
        ## If obstype is wrong
        if verbosely:
            log.info(f'ignoring: {reqpath} -- {obstype} not an obstype we care about')
        else:
            log.info(f'{exp}: skipped  -- {obstype} not relevant obstype')
        return None

    ## Look for the data. If it's not there, say so then move on
    datapath = pathjoin(raw_data_dir, night, exp, f'desi-{exp}.fits.fz')
    if not os.path.exists(datapath):
        if verbosely:
            log.info(f'could not find {datapath}! It had obstype={obstype}. Skipping')
        else:
            log.info(f'{exp}: skipped  -- data not found')
        return None
    else:
        if verbosely:
            log.info(f'using: {datapath}')

    ## Raw data, so ensure it's read only and close right away just to be safe
    # log.debug(hdulist.info())

    header,fx = load_raw_data_header(pathname=datapath, return_filehandle=True)
    # log.debug(header)
    # log.debug(specs)

    ## Define the column values for the current exposure in a dictionary
    outdict = {}
    ## Set HEADERERR and EXPFLAG before loop because they may be set if other columns have missing information
    outdict['HEADERERR'] = coldefaults[colnames == 'HEADERERR'][0]
    outdict['EXPFLAG'] = coldefaults[colnames == 'EXPFLAG'][0]
    ## Loop over columns and fill in the information. If unavailable report/flag if necessary and assign default
    for key,default in zip(colnames,coldefaults):
        ## These are dealt with separately
        if key in ['NIGHT','HEADERERR','EXPFLAG']:
            continue
        ## These just need defaults, as they are user defined (except FA_SURV which comes from the request.json file
        elif key in ['CAMWORD', 'FA_SURV', 'BADCAMWORD', 'BADAMPS', 'LASTSTEP', 'COMMENTS']:
            outdict[key] = default
        ## Try to find the key in the raw data header
        elif key in header.keys():
            val = header[key]
            if type(val) is str:
                outdict[key] = val.lower()
            else:
                outdict[key] = val
        ## If key not in the header, identify that and place a default value
        ## If obstype isn't arc or flat, don't worry about seqnum or seqtot
        elif key in ['SEQNUM','SEQTOT'] and obstype not in ['arc','flat']:
            outdict[key] = default
        ## If tileid or TARGT and not science, just replace with default
        elif key in ['TILEID','TARGTRA','TARGTDEC'] and obstype not in ['science']:
            outdict[key] = default
        ## If trying to assign purpose and it's before that was defined, just give default
        elif key in ['PURPOSE'] and int(night) < 20201201:
            outdict[key] = default
        ## if something else, flag as missing metadata and replace with default
        else:
            if 'metadata_missing' not in outdict['EXPFLAG']:
                outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'metadata_missing')
            outdict[key] = default
            if np.isscalar(default):
                reporting = keyval_change_reporting(key, '', default)
                outdict['HEADERERR'] = np.append(outdict['HEADERERR'], reporting)

    ## Make sure that the night is defined:
    try:
        outdict['NIGHT'] = int(header['NIGHT'])
    except (KeyError, ValueError, TypeError):
        if 'metadata_missing' not in outdict['EXPFLAG']:
            outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'metadata_missing')
        outdict['NIGHT'] = header2night(header)
        try:
            orig = str(header['NIGHT'])
        except (KeyError, ValueError, TypeError):
            orig = ''
        reporting = keyval_change_reporting('NIGHT',orig,outdict['NIGHT'])
        outdict['HEADERERR'] = np.append(outdict['HEADERERR'],reporting)

    ## Get the cameras available in the raw data and summarize with camword
    cams = cameras_from_raw_data(fx)
    camword = create_camword(cams)
    outdict['CAMWORD'] = camword
    fx.close()

    ## Add the fiber assign survey, if it doesn't exist use the pre-defined one
    if "FA_SURV" in req_dict and "FA_SURV" in colnames:
        outdict['FA_SURV'] = req_dict['FA_SURV']

    ## Flag the exposure based on PROGRAM information
    if 'system test' in outdict['PROGRAM'].lower():
        outdict['LASTSTEP'] = 'ignore'
        outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'test')
        log.info(f"Exposure {exp} identified as system test. Not processing.")
    elif obstype == 'science' and float(outdict['EXPTIME']) < 59.0:
        outdict['LASTSTEP'] = 'skysub'
        outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'short_exposure')
        log.info(f"Science exposure {exp} with EXPTIME less than 59s. Processing through sky subtraction.")
    elif obstype == 'science' and 'undither' in outdict['PROGRAM']:
        outdict['LASTSTEP'] = 'fluxcal'
        log.info(f"Science exposure {exp} identified as undithered. Processing through flux calibration.")
    elif obstype == 'science' and 'dither' in outdict['PROGRAM']:
        outdict['LASTSTEP'] = 'skysub'
        log.info(f"Science exposure {exp} identified as dither. Processing through sky subtraction.")

    ## For Things defined in both request and data, if they don't match, flag in the
    ##     output file for followup/clarity
    for check in ['OBSTYPE']:#, 'FLAVOR']:
        rval, hval = req_dict[check], header[check]
        if rval != hval:
            log.warning(f'In keyword {check}, request and data header disagree: req:{rval}\tdata:{hval}')
            if 'metadata_mismatch' not in outdict['EXPFLAG']:
                outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'metadata_mismatch')
            outdict['COMMENTS'] = np.append(outdict['COMMENTS'],f'For {check}: req={rval} but hdu={hval}')
        else:
            if verbosely:
                log.info(f'{check} checks out')

    ## Special logic for EXPTIME because of real-world variance on order 10's - 100's of ms
    check = 'EXPTIME'
    rval, hval = req_dict[check], header[check]
    if np.abs(float(rval)-float(hval))>0.5:
        log.warning(f'In keyword {check}, request and data header disagree: req:{rval}\tdata:{hval}')
        if 'aborted' not in outdict['EXPFLAG']:
            outdict['EXPFLAG'] = np.append(outdict['EXPFLAG'], 'aborted')
        outdict['COMMENTS'] = np.append(outdict['COMMENTS'],f'For {check}: req={rval} but hdu={hval}')
    else:
        if verbosely:
            log.info(f'{check} checks out')

    log.info(f'Done summarizing exposure: {exp}')
    return outdict

Exemple #8

0

Afficher le fichier

def submit_night(night,
                 proc_obstypes=None,
                 dry_run=False,
                 queue='realtime',
                 reservation=None,
                 exp_table_path=None,
                 proc_table_path=None,
                 tab_filetype='csv',
                 error_if_not_available=True,
                 overwrite_existing_tables=False,
                 dont_check_job_outputs=False,
                 dont_resubmit_partial_jobs=False,
                 system_name=None):
    """
    Creates a processing table and an unprocessed table from a fully populated exposure table and submits those
    jobs for processing (unless dry_run is set).

    Args:
        night, int. The night of data to be processed. Exposure table must exist.
        proc_obstypes, list or np.array. Optional. A list of exposure OBSTYPE's that should be processed (and therefore
                                              added to the processing table).
        dry_run, bool. Default is False. Should the jobs written to the processing table actually be submitted
                                             for processing.
        exp_table_path: str. Full path to where to exposure tables are stored, WITHOUT the monthly directory included.
        proc_table_path: str. Full path to where to processing tables to be written.
        queue: str. The name of the queue to submit the jobs to. Default is "realtime".
        reservation: str. The reservation to submit jobs to. If None, it is not submitted to a reservation.
        tab_filetype: str. The file extension (without the '.') of the exposure and processing tables.
        error_if_not_available: bool. Default is True. Raise as error if the required exposure table doesn't exist,
                                      otherwise prints an error and returns.
        overwrite_existing_tables: bool. True if you want to submit jobs even if a processing table already exists.
                                         Otherwise jobs will be appended to it. Default is False
        dont_check_job_outputs, bool. Default is False. If False, the code checks for the existence of the expected final
                                 data products for the script being submitted. If all files exist and this is False,
                                 then the script will not be submitted. If some files exist and this is False, only the
                                 subset of the cameras without the final data products will be generated and submitted.
        dont_resubmit_partial_jobs, bool. Default is False. Must be used with dont_check_job_outputs=False. If this flag is
                                          False, jobs with some prior data are pruned using PROCCAMWORD to only process the
                                          remaining cameras not found to exist.
        system_name: batch system name, e.g. cori-haswell, cori-knl, perlmutter-gpu
    Returns:
        None.
    """
    log = get_logger()

    ## Recast booleans from double negative
    check_for_outputs = (not dont_check_job_outputs)
    resubmit_partial_complete = (not dont_resubmit_partial_jobs)

    if proc_obstypes is None:
        proc_obstypes = default_exptypes_for_proctable()

    ## Determine where the exposure table will be written
    if exp_table_path is None:
        exp_table_path = get_exposure_table_path(night=night, usespecprod=True)
    name = get_exposure_table_name(night=night, extension=tab_filetype)
    exp_table_pathname = pathjoin(exp_table_path, name)
    if not os.path.exists(exp_table_pathname):
        if error_if_not_available:
            raise IOError(
                f"Exposure table: {exp_table_pathname} not found. Exiting this night."
            )
        else:
            print(
                f"ERROR: Exposure table: {exp_table_pathname} not found. Exiting this night."
            )
            return

    ## Determine where the processing table will be written
    if proc_table_path is None:
        proc_table_path = get_processing_table_path()
    os.makedirs(proc_table_path, exist_ok=True)
    name = get_processing_table_name(prodmod=night, extension=tab_filetype)
    proc_table_pathname = pathjoin(proc_table_path, name)

    ## Check if night has already been submitted and don't submit if it has, unless told to with ignore_existing
    if not overwrite_existing_tables and os.path.exists(proc_table_pathname):
        print(
            f"ERROR: Processing table: {proc_table_pathname} already exists and not "
            + "given flag overwrite_existing. Exiting this night.")
        return

    ## Determine where the unprocessed data table will be written
    unproc_table_pathname = pathjoin(proc_table_path,
                                     name.replace('processing', 'unprocessed'))

    ## Combine the table names and types for easier passing to io functions
    table_pathnames = [exp_table_pathname, proc_table_pathname]
    table_types = ['exptable', 'proctable']

    ## Load in the files defined above
    etable, ptable = load_tables(tablenames=table_pathnames,
                                 tabletypes=table_types)

    ## Get context specific variable values
    true_night = what_night_is_it()
    nersc_start = nersc_start_time(night=true_night)
    nersc_end = nersc_end_time(night=true_night)

    good_exps = np.array(
        [col.lower() != 'ignore' for col in etable['LASTSTEP']]).astype(bool)
    good_types = np.array([val in proc_obstypes
                           for val in etable['OBSTYPE']]).astype(bool)
    good_exptimes = []
    for erow in etable:
        if erow['OBSTYPE'] == 'science' and erow['EXPTIME'] < 60:
            good_exptimes.append(False)
        elif erow['OBSTYPE'] == 'arc' and erow['EXPTIME'] > 8.:
            good_exptimes.append(False)
        else:
            good_exptimes.append(True)
    good_exptimes = np.array(good_exptimes)
    good = (good_exps & good_types & good_exptimes)
    unproc_table = etable[~good]
    etable = etable[good]

    write_table(unproc_table, tablename=unproc_table_pathname)
    ## Get relevant data from the tables
    arcs, flats, sciences, arcjob, flatjob, \
    curtype, lasttype, curtile, lasttile, internal_id = parse_previous_tables(etable, ptable, night)
    # if len(ptable) > 0:
    #     ptable_expids = np.unique(np.concatenate(ptable['EXPID']))
    # else:
    #     ptable_expids = np.array([], dtype=int)

    ## Loop over new exposures and process them as relevant to that type
    for ii, erow in enumerate(etable):
        # if erow['EXPID'] in ptable_expids:
        #     continue
        erow = table_row_to_dict(erow)
        exp = int(erow['EXPID'])
        print(f'\n\n##################### {exp} #########################')

        print(f"\nFound: {erow}")

        curtype, curtile = get_type_and_tile(erow)

        if lasttype is not None and ((curtype != lasttype) or
                                     (curtile != lasttile)):
            ptable, arcjob, flatjob, \
            sciences, internal_id    = checkfor_and_submit_joint_job(ptable, arcs, flats, sciences, arcjob, flatjob,
                                                                     lasttype, internal_id, dry_run=dry_run,
                                                                     queue=queue, reservation=reservation,
                                                                     strictly_successful=True,
                                                                     check_for_outputs=check_for_outputs,
                                                                     resubmit_partial_complete=resubmit_partial_complete,
                                                                     system_name=system_name)

        prow = erow_to_prow(erow)
        prow['INTID'] = internal_id
        internal_id += 1
        prow['JOBDESC'] = prow['OBSTYPE']
        prow = define_and_assign_dependency(prow, arcjob, flatjob)
        print(f"\nProcessing: {prow}\n")
        prow = create_and_submit(
            prow,
            dry_run=dry_run,
            queue=queue,
            reservation=reservation,
            strictly_successful=True,
            check_for_outputs=check_for_outputs,
            resubmit_partial_complete=resubmit_partial_complete,
            system_name=system_name)
        ptable.add_row(prow)
        # ptable_expids = np.append(ptable_expids, erow['EXPID'])

        ## Note: Assumption here on number of flats
        if curtype == 'flat' and flatjob is None and int(erow['SEQTOT']) < 5:
            flats.append(prow)
        elif curtype == 'arc' and arcjob is None:
            arcs.append(prow)
        elif curtype == 'science' and prow['LASTSTEP'] != 'skysub':
            sciences.append(prow)

        lasttile = curtile
        lasttype = curtype

        if not dry_run:
            time.sleep(1)

        tableng = len(ptable)
        if tableng > 0 and ii % 10 == 0:
            write_table(ptable, tablename=proc_table_pathname)
            if not dry_run:
                print("\n",
                      "Sleeping 2s to slow down the queue submission rate")
                time.sleep(2)

        ## Flush the outputs
        sys.stdout.flush()
        sys.stderr.flush()

    if tableng > 0:
        ## No more data coming in, so do bottleneck steps if any apply
        ptable, arcjob, flatjob, \
        sciences, internal_id = checkfor_and_submit_joint_job(ptable, arcs, flats, sciences, arcjob, flatjob,
                                                              lasttype, internal_id, dry_run=dry_run,
                                                              queue=queue, reservation=reservation,
                                                              strictly_successful=True,
                                                              check_for_outputs=check_for_outputs,
                                                              resubmit_partial_complete=resubmit_partial_complete,
                                                              system_name=system_name)
        ## All jobs now submitted, update information from job queue and save
        ptable = update_from_queue(ptable,
                                   start_time=nersc_start,
                                   end_time=nersc_end,
                                   dry_run=dry_run)
        write_table(ptable, tablename=proc_table_pathname)

    print(f"Completed submission of exposures for night {night}.", '\n\n\n')

Exemple #9

0

Afficher le fichier

def create_exposure_tables(nights=None, night_range=None, path_to_data=None, exp_table_path=None, obstypes=None, \
                           exp_filetype='csv', cameras='', bad_cameras='', badamps='',
                           verbose=False, no_specprod=False, overwrite_files=False):
    """
    Generates processing tables for the nights requested. Requires exposure tables to exist on disk.

    Args:
        nights: str, int, or comma separated list. The night(s) to generate procesing tables for.
        night_range: str, comma separated pair of nights in form YYYYMMDD,YYYYMMDD for first_night,last_night
                          specifying the beginning and end of a range of nights to be generated.
                          last_night should be inclusive.
        path_to_data: str. The path to the raw data and request*.json and manifest* files.
        exp_table_path: str. Full path to where to exposure tables should be saved, WITHOUT the monthly directory included.
        obstypes: str or comma separated list of strings. The exposure OBSTYPE's that you want to include in the exposure table.
        exp_filetype: str. The file extension (without the '.') of the exposure tables.
        verbose: boolean. Whether to give verbose output information or not. True prints more information.
        no_specprod: boolean. Create exposure table in repository location rather than the SPECPROD location
        overwrite_files: boolean. Whether to overwrite processing tables if they exist. True overwrites.
        cameras: str. Explicitly define the cameras for which you want to reduce the data. Should be a comma separated
                      list. Only numbers assumes you want to reduce r, b, and z for that camera. Otherwise specify
                      separately [brz][0-9].
        bad_cameras: str. Explicitly define the cameras that you don't want to reduce the data. Should be a comma
                          separated list. Only numbers assumes you want to reduce r, b, and z for that camera.
                          Otherwise specify separately [brz][0-9].
        badamps: str. Define amplifiers that you know to be bad and should not be processed. Should be a list separated
                      by comma or semicolon. Saved list will converted to semicolons. Each entry should be of the
                      form {camera}{spectrograph}{amp}, i.e. [brz][0-9][A-D].
    Returns: Nothing
    """
    if nights is None and night_range is None:
        raise ValueError("Must specify either nights or night_range")
    elif nights is not None and night_range is not None:
        raise ValueError("Must only specify either nights or night_range, not both")

    if nights is None or nights=='all':
        nights = list()
        for n in listpath(os.getenv('DESI_SPECTRO_DATA')):
            #- nights are 20YYMMDD
            if re.match('^20\d{6}$', n):
                nights.append(n)
    else:
        nights = [ int(val.strip()) for val in nights.split(",") ]

    nights = np.array(nights)

    if night_range is not None:
        if ',' not in night_range:
            raise ValueError("night_range must be a comma separated pair of nights in form YYYYMMDD,YYYYMMDD")
        nightpair = night_range.split(',')
        if len(nightpair) != 2 or not nightpair[0].isnumeric() or not nightpair[1].isnumeric():
            raise ValueError("night_range must be a comma separated pair of nights in form YYYYMMDD,YYYYMMDD")
        first_night, last_night = nightpair
        nights = nights[np.where(int(first_night)<=nights.astype(int))[0]]
        nights = nights[np.where(int(last_night)>=nights.astype(int))[0]]

    if obstypes is not None:
        obstypes = [ val.strip('\t ') for val in obstypes.split(",") ]
    else:
        obstypes = default_exptypes_for_exptable()

    print("Nights: ", nights)
    print("Obs types: ", obstypes)

    ## Deal with cameras and amps, if given
    camword = cameras
    if camword != '':
        camword = parse_cameras(camword)
    badcamword = bad_cameras
    if badcamword != '':
        badcamword = parse_cameras(badcamword)

    ## Warn people if changing camword
    finalcamword = 'a0123456789'
    if camword is not None and badcamword is None:
        badcamword = difference_camwords(finalcamword,camword)
        finalcamword = camword
    elif camword is not None and badcamword is not None:
        finalcamword = difference_camwords(camword, badcamword)
        badcamword = difference_camwords('a0123456789', finalcamword)
    elif badcamword is not None:
        finalcamword = difference_camwords(finalcamword,badcamword)
    else:
        badcamword = ''

    if badcamword != '':
        ## Inform the user what will be done with it.
        print(f"Modifying camword of data to be processed with badcamword: {badcamword}. " + \
              f"Camword to be processed: {finalcamword}")

    ## Make sure badamps is formatted properly
    if badamps is None:
        badamps = ''
    else:
        badamps = validate_badamps(badamps)

    ## Define where to find the data
    if path_to_data is None:
        path_to_data = define_variable_from_environment(env_name='DESI_SPECTRO_DATA',
                                                        var_descr="The data path")

    ## Define where to save the data
    usespecprod = (not no_specprod)
    if exp_table_path is None:
        exp_table_path = get_exposure_table_path(night=None,usespecprod=usespecprod)

    ## Make the save directory exists
    os.makedirs(exp_table_path, exist_ok=True)

    ## Loop over nights
    colnames, coltypes, coldefaults = get_exposure_table_column_defs(return_default_values=True)
    for night in nights:
        if str(night) not in listpath(path_to_data):
            print(f'Night: {night} not in data directory {path_to_data}. Skipping')
            continue

        print(get_printable_banner(input_str=night))

        ## Create an astropy exposure table for the night
        nightly_tab = instantiate_exposure_table()

        ## Loop through all exposures on disk
        for exp in listpath(path_to_data,str(night)):
            rowdict = summarize_exposure(path_to_data, night=night, exp=exp, obstypes=obstypes, \
                                         colnames=colnames, coldefaults=coldefaults, verbosely=verbose)
            if rowdict is not None and type(rowdict) is not str:
                rowdict['BADCAMWORD'] = badcamword
                rowdict['BADAMPS'] = badamps
                ## Add the dictionary of column values as a new row
                nightly_tab.add_row(rowdict)
            if verbose:
                print("Rowdict:\n",rowdict,"\n\n")

        if len(nightly_tab) > 0:
            month = night_to_month(night)
            exptab_path = pathjoin(exp_table_path,month)
            os.makedirs(exptab_path,exist_ok=True)
            exptab_name = get_exposure_table_name(night, extension=exp_filetype)
            exptab_name = pathjoin(exptab_path, exptab_name)
            write_table(nightly_tab, exptab_name, overwrite=overwrite_files)
        else:
            print('No rows to write to a file.')

        print("Exposure table generations complete")
        ## Flush the outputs
        sys.stdout.flush()
        sys.stderr.flush()

Exemple #10

0

Afficher le fichier

Fichier : processingtable.py Projet : dmargala/desispec

def create_processing_tables(nights=None,
                             night_range=None,
                             exp_table_path=None,
                             proc_table_path=None,
                             obstypes=None,
                             overwrite_files=False,
                             verbose=False,
                             no_specprod_exptab=False,
                             exp_filetype='csv',
                             prod_filetype='csv',
                             joinsymb='|'):
    """
    Generates processing tables for the nights requested. Requires exposure tables to exist on disk.

    Args:
        nights: str, int, or comma separated list. The night(s) to generate procesing tables for.
        night_range: str, comma separated pair of nights in form YYYYMMDD,YYYYMMDD for first_night,last_night
                          specifying the beginning and end of a range of nights to be generated.
                          last_night should be inclusive.
        exp_table_path: str. Full path to where to exposure tables are stored, WITHOUT the monthly directory included.
        proc_table_path: str. Full path to where to processing tables to be written.
        obstypes: str or comma separated list of strings. The exposure OBSTYPE's that you want to include in the processing table.
        overwrite_files: boolean. Whether to overwrite processing tables if they exist. True overwrites.
        verbose: boolean. Whether to give verbose output information or not. True prints more information.
        no_specprod_exptab: boolean. Read exposure table in repository location rather than the SPECPROD location.
        exp_filetype: str. The file extension (without the '.') of the exposure tables.
        prod_filetype: str. The file extension (without the '.') of the processing tables.
        joinsymb: str. Symbol to use to indicate the separation of array values when converting to and from strings for
                       saving to csv. Default is highly advised and is '|'. Using a comma will break many things.

    Returns: Nothing

    Notes:
        Requires exposure tables to exist on disk. Either in the default location or at the location specified
        using the function arguments.
    """
    if nights is None and night_range is None:
        raise ValueError("Must specify either nights or night_range")
    elif nights is not None and night_range is not None:
        raise ValueError(
            "Must only specify either nights or night_range, not both")

    if nights is None or nights == 'all':
        nights = list()
        for n in listpath(os.getenv('DESI_SPECTRO_DATA')):
            # - nights are 20YYMMDD
            if re.match('^20\d{6}$', n):
                nights.append(n)
    else:
        nights = [int(val.strip()) for val in nights.split(",")]

    nights = np.array(nights)

    if night_range is not None:
        if ',' not in night_range:
            raise ValueError(
                "night_range must be a comma separated pair of nights in form YYYYMMDD,YYYYMMDD"
            )
        nightpair = night_range.split(',')
        if len(nightpair) != 2 or not nightpair[0].isnumeric(
        ) or not nightpair[1].isnumeric():
            raise ValueError(
                "night_range must be a comma separated pair of nights in form YYYYMMDD,YYYYMMDD"
            )
        first_night, last_night = nightpair
        nights = nights[np.where(int(first_night) <= nights.astype(int))[0]]
        nights = nights[np.where(int(last_night) >= nights.astype(int))[0]]

    if obstypes is not None:
        obstypes = [val.strip('\t ') for val in obstypes.split(",")]
    else:
        obstypes = default_exptypes_for_proctable()

    ## Define where to find the data
    if exp_table_path is None:
        usespecprod = (not no_specprod_exptab)
        exp_table_path = get_exposure_table_path(night=None,
                                                 usespecprod=usespecprod)

    ## Define where to save the data
    if proc_table_path is None:
        proc_table_path = get_processing_table_path()

    if type(nights) is str and nights == 'all':
        exptables = []
        for month in listpath(exp_table_path):
            exptables += listpath(exp_table_path, month)

        nights = np.unique([
            file.split('_')[2].split('.')[0] for file in sorted(exptables)
            if '.' + exp_filetype in file
        ]).astype(int)

    if verbose:
        print(f'Nights: {nights}')

    ## Make the save directory exists
    os.makedirs(exp_table_path, exist_ok=True)

    ## Make the save directory if it doesn't exist
    if not os.path.isdir(proc_table_path):
        print(f'Creating directory: {proc_table_path}')
        os.makedirs(proc_table_path)

    ## Create an astropy table for each night. Define the columns and datatypes, but leave each with 0 rows
    combined_table = Table()

    ## Loop over nights
    for night in nights:
        if verbose:
            print(get_printable_banner(input_str=night))
        else:
            print(f'Processing {night}')
        exptab_name = get_exposure_table_name(night=night,
                                              extension=exp_filetype)
        month = night_to_month(night)
        exptable = load_table(pathjoin(exp_table_path, month, exptab_name),
                              process_mixins=False)

        if night == nights[0]:
            combined_table = exptable.copy()
        else:
            combined_table = vstack([combined_table, exptable])

    processing_table, unprocessed_table = exptable_to_proctable(
        combined_table, obstypes=obstypes)  #,joinsymb=joinsymb)

    ## Save the tables
    proc_name = get_processing_table_name(extension=prod_filetype)
    unproc_name = proc_name.replace('processing', 'unprocessed')
    for tab, name in zip([processing_table, unprocessed_table],
                         [proc_name, unproc_name]):
        if len(tab) > 0:
            pathname = pathjoin(proc_table_path, name)
            write_table(tab, pathname, overwrite=overwrite_files)
            print(f'Wrote file: {name}')

    print("Processing table generations complete")
    ## Flush the outputs
    sys.stdout.flush()
    sys.stderr.flush()

Exemple #11

0

Afficher le fichier

Fichier : editexptable.py Projet : dmargala/desispec

def edit_exposure_table(exp_str,
                        colname,
                        value,
                        night=None,
                        tablepath=None,
                        append_string=False,
                        overwrite_value=False,
                        use_spec_prod=True,
                        read_user_version=False,
                        write_user_version=False,
                        overwrite_file=True):  #, joinsymb='|'):
    """
    Edits the exposure table on disk to change the column named colname to value of value for rows of exposure table
    that correspond to the exposures defined in exp_str. The table on disk can be defined using night given directly
    with tablepath.

    Note: This overwrites an exposure table file on disk by default.

    Args:
        exp_str, str. A string representing the exposure ID's for which you want to edit the column to a new value.
                      The string can be any combination of integer ranges, single integers, or 'all'. Each range or integer
                      is separated by a comma. 'all' implies all exposures. Ranges can be given using ':', '-', or '..'.
                      All ranges are assumed to be inclusive.
        colname, str. The column name in the exptable where you want to change values.
        value, any scalar type. The value you want to change the column value of each exp_str exposure row to.
        night, str or int. The night the exposures were acquired on. This uniquely defines the exposure table.
        tablepath, str. A relative or absolute path to the exposure table file, if named differently from the default
                        in desispec.workflow.exptable.
        append_string, bool. True if you want to append your input value to the end of an existing string. Used
                             for BADAMPS. Default is False.
        overwrite_value, bool. True if you want to overwrite a non-default value, if it exists. Default is False.
        use_spec_prod, bool. True if you want to read in the exposure table defined by night from the currently
                             defined SPECPROD as opposed to the exposure table repository location. Default is True.
        read_user_version, bool. True if you want to read in an exposure table saved including the current user's
                                 USER name. Meant for test editing of a file multiple times. If the file doesn't exist,
                                 the non-user value is loaded. Default is False.
        write_user_version, bool. True if you want to write in an exposure table saved including the current user's
                                 USER name. Meant for test editing of a file without overwriting the true exposure table.
                                 Default is False.
        overwrite_file, bool. True if you want to overwrite the file on disk. Default is True.
    """
    ## Don't edit fixed columns
    colname = colname.upper()
    if tablepath is None and night is None:
        raise ValueError("Must specify night or the path to the table.")
    if colname in columns_not_to_edit():
        raise ValueError(f"Not allowed to edit colname={colname}.")
    if append_string and overwrite_value:
        raise ValueError("Cannot append_str and overwrite_value.")

    ## Get the file locations
    if tablepath is not None:
        path, name = os.path.split(tablepath)
    else:
        path = get_exposure_table_path(night=night, usespecprod=use_spec_prod)
        name = get_exposure_table_name(night=night)  #, extension='.csv')

    pathname = pathjoin(path, name)
    user_pathname = os.path.join(
        path, name.replace('.csv', '_' + str(os.environ['USER']) + '.csv'))

    ## Read in the table
    if read_user_version:
        if os.path.isfile(user_pathname):
            exptable = load_table(tablename=user_pathname,
                                  tabletype='exptable')
        else:
            print(
                "Couldn't locate a user version of the exposure table, loading the default version of the table."
            )
            exptable = load_table(tablename=pathname, tabletype='exptable')
    else:
        exptable = load_table(tablename=pathname, tabletype='exptable')

    if exptable is None:
        print("There was a problem loading the exposure table... Exiting.")
        return

    ## Do the modification
    outtable = change_exposure_table_rows(exptable, exp_str, colname, value,
                                          append_string,
                                          overwrite_value)  #, joinsymb)

    ## Write out the table
    if write_user_version:
        write_table(outtable,
                    tablename=user_pathname,
                    tabletype='exptable',
                    overwrite=overwrite_file)
        print(f"Wrote edited table to: {user_pathname}")
    else:
        write_table(outtable,
                    tablename=pathname,
                    tabletype='exptable',
                    overwrite=overwrite_file)
        print(f"Wrote edited table to: {pathname}")