Ejemplo n.º 1
0
def main(cfg):
    """Compute the time average for each input dataset."""
    # Get a description of the preprocessed data that we will use as input.
    input_data = cfg['input_data'].values()

    # Demonstrate use of metadata access convenience functions.
    selection = select_metadata(input_data, short_name='pr', project='CMIP5')
    logger.info("Example of how to select only CMIP5 precipitation data:\n%s",
                pformat(selection))

    selection = sorted_metadata(selection, sort='dataset')
    logger.info("Example of how to sort this selection by dataset:\n%s",
                pformat(selection))

    grouped_input_data = group_metadata(input_data,
                                        'standard_name',
                                        sort='dataset')
    logger.info(
        "Example of how to group and sort input data by standard_name:"
        "\n%s", pformat(grouped_input_data))

    # Example of how to loop over variables/datasets in alphabetical order
    for standard_name in grouped_input_data:
        logger.info("Processing variable %s", standard_name)
        for attributes in grouped_input_data[standard_name]:
            logger.info("Processing dataset %s", attributes['dataset'])
            input_file = attributes['filename']
            cube = compute_diagnostic(input_file)

            output_basename = os.path.splitext(
                os.path.basename(input_file))[0] + '_mean'
            provenance_record = get_provenance_record(
                attributes, ancestor_files=[input_file])
            plot_diagnostic(cube, output_basename, provenance_record, cfg)
Ejemplo n.º 2
0
def test_sorted_metadata():

    metadata = [
        {
            'short_name': 'ta',
            'dataset': 'dataset2',
        },
        {
            'short_name': 'pr',
            'dataset': 'dataset2',
            'random_attribute': 1,
        },
        {
            'short_name': 'ta',
            'dataset': 'dataset1',
        },
    ]

    result = shared.sorted_metadata(metadata, sort=['short_name', 'dataset'])

    assert result == [
        {
            'short_name': 'pr',
            'dataset': 'dataset2',
            'random_attribute': 1,
        },
        {
            'short_name': 'ta',
            'dataset': 'dataset1'
        },
        {
            'short_name': 'ta',
            'dataset': 'dataset2'
        },
    ]
def main(cfg):
    """Compute the time average for each input dataset."""
    # Get a description of the preprocessed data that we will use as input.
    input_data = cfg['input_data'].values()

    # Demonstrate use of metadata access convenience functions.
    selection = select_metadata(input_data, short_name='tas', project='CMIP5')
    logger.info("Example of how to select only CMIP5 temperature data:\n%s",
                pformat(selection))

    selection = sorted_metadata(selection, sort='dataset')
    logger.info("Example of how to sort this selection by dataset:\n%s",
                pformat(selection))

    grouped_input_data = group_metadata(input_data,
                                        'variable_group',
                                        sort='dataset')
    logger.info(
        "Example of how to group and sort input data by variable groups from "
        "the recipe:\n%s", pformat(grouped_input_data))

    # Example of how to loop over variables/datasets in alphabetical order
    groups = group_metadata(input_data, 'variable_group', sort='dataset')
    for group_name in groups:
        logger.info("Processing variable %s", group_name)
        for attributes in groups[group_name]:
            logger.info("Processing dataset %s", attributes['dataset'])
            input_file = attributes['filename']
            cube = compute_diagnostic(input_file)

            output_basename = Path(input_file).stem
            if group_name != attributes['short_name']:
                output_basename = group_name + '_' + output_basename
            provenance_record = get_provenance_record(
                attributes, ancestor_files=[input_file])
            plot_diagnostic(cube, output_basename, provenance_record, cfg)
def main(cfg):
    """Ensemble Clustering Diagnostics."""
    out_dir = cfg['work_dir']
    write_plots = cfg['write_plots']
    input_data = cfg['input_data'].values()
    input_data = sorted_metadata(input_data, sort='recipe_dataset_index')
    files_dict = group_metadata(input_data, 'filename', sort=False)
    numens = len(files_dict)
    logger.info('numens=%d', numens)

    # Building the name of output files
    element = list(files_dict.values())[0][0]
    name_outputs = (element['short_name'] + '_' + str(numens) + 'ens_' +
                    cfg['season'] + '_' + cfg['area'] + '_' +
                    element['project'] + '_' + element['exp'])
    logger.info('The name of the output files will be <variable>_%s.txt',
                name_outputs)
    variable_name = element['short_name']
    max_plot_panels = cfg.get('max_plot_panels', 72)
    numpcs = cfg.get('numpcs', 0)
    perc = cfg.get('numpcs', 80)

    filenames_cat = []
    legend_cat = []
    for value in files_dict.values():
        logger.info("Processing file %s", value[0]['filename'])
        filenames_cat.append(value[0]['filename'])
        leg = (value[0]['project'] + " " + value[0]['dataset'] + " " +
               value[0]['exp'] + " " + value[0]['mip'] + " " +
               value[0]['short_name'] + " " + value[0]['ensemble'] + " " +
               str(value[0]['start_year']) + "-" + str(value[0]['end_year']))
        legend_cat.append(leg)
        logger.info('Processing: %s', leg)
    namef = os.path.join(out_dir, 'legend_{0}.txt'.format(name_outputs))
    np.savetxt(namef, legend_cat, fmt='%s')

    # ###################### PRECOMPUTATION #######################
    outfiles = ens_anom(filenames_cat, out_dir, name_outputs, variable_name,
                        numens, cfg['season'], cfg['area'], cfg['extreme'])

    # ###################### EOF AND K-MEANS ANALYSES #######################
    outfiles2 = ens_eof_kmeans(out_dir, name_outputs, numens, numpcs, perc,
                               cfg['numclus'])

    outfiles = outfiles + outfiles2
    provenance_record = get_provenance_record(cfg,
                                              list(files_dict.values())[0][0],
                                              ancestor_files=filenames_cat)

    # ###################### PLOT AND SAVE FIGURES ##########################
    if write_plots:
        plotfiles = ens_plots(out_dir, cfg['plot_dir'], name_outputs,
                              cfg['numclus'], 'anomalies',
                              cfg['output_file_type'], cfg['season'],
                              cfg['area'], cfg['extreme'], max_plot_panels)
    else:
        plotfiles = []

    for file in outfiles + plotfiles:
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(file, provenance_record)

    logger.info('\n>>>>>>>>>>>> ENDED SUCCESSFULLY!! <<<<<<<<<<<<\n')