def main(cfg): """Compute the time average for each input dataset.""" # Get a description of the preprocessed data that we will use as input. input_data = cfg['input_data'].values() # Demonstrate use of metadata access convenience functions. selection = select_metadata(input_data, short_name='pr', project='CMIP5') logger.info("Example of how to select only CMIP5 precipitation data:\n%s", pformat(selection)) selection = sorted_metadata(selection, sort='dataset') logger.info("Example of how to sort this selection by dataset:\n%s", pformat(selection)) grouped_input_data = group_metadata(input_data, 'standard_name', sort='dataset') logger.info( "Example of how to group and sort input data by standard_name:" "\n%s", pformat(grouped_input_data)) # Example of how to loop over variables/datasets in alphabetical order for standard_name in grouped_input_data: logger.info("Processing variable %s", standard_name) for attributes in grouped_input_data[standard_name]: logger.info("Processing dataset %s", attributes['dataset']) input_file = attributes['filename'] cube = compute_diagnostic(input_file) output_basename = os.path.splitext( os.path.basename(input_file))[0] + '_mean' provenance_record = get_provenance_record( attributes, ancestor_files=[input_file]) plot_diagnostic(cube, output_basename, provenance_record, cfg)
def test_sorted_metadata(): metadata = [ { 'short_name': 'ta', 'dataset': 'dataset2', }, { 'short_name': 'pr', 'dataset': 'dataset2', 'random_attribute': 1, }, { 'short_name': 'ta', 'dataset': 'dataset1', }, ] result = shared.sorted_metadata(metadata, sort=['short_name', 'dataset']) assert result == [ { 'short_name': 'pr', 'dataset': 'dataset2', 'random_attribute': 1, }, { 'short_name': 'ta', 'dataset': 'dataset1' }, { 'short_name': 'ta', 'dataset': 'dataset2' }, ]
def main(cfg): """Compute the time average for each input dataset.""" # Get a description of the preprocessed data that we will use as input. input_data = cfg['input_data'].values() # Demonstrate use of metadata access convenience functions. selection = select_metadata(input_data, short_name='tas', project='CMIP5') logger.info("Example of how to select only CMIP5 temperature data:\n%s", pformat(selection)) selection = sorted_metadata(selection, sort='dataset') logger.info("Example of how to sort this selection by dataset:\n%s", pformat(selection)) grouped_input_data = group_metadata(input_data, 'variable_group', sort='dataset') logger.info( "Example of how to group and sort input data by variable groups from " "the recipe:\n%s", pformat(grouped_input_data)) # Example of how to loop over variables/datasets in alphabetical order groups = group_metadata(input_data, 'variable_group', sort='dataset') for group_name in groups: logger.info("Processing variable %s", group_name) for attributes in groups[group_name]: logger.info("Processing dataset %s", attributes['dataset']) input_file = attributes['filename'] cube = compute_diagnostic(input_file) output_basename = Path(input_file).stem if group_name != attributes['short_name']: output_basename = group_name + '_' + output_basename provenance_record = get_provenance_record( attributes, ancestor_files=[input_file]) plot_diagnostic(cube, output_basename, provenance_record, cfg)
def main(cfg): """Ensemble Clustering Diagnostics.""" out_dir = cfg['work_dir'] write_plots = cfg['write_plots'] input_data = cfg['input_data'].values() input_data = sorted_metadata(input_data, sort='recipe_dataset_index') files_dict = group_metadata(input_data, 'filename', sort=False) numens = len(files_dict) logger.info('numens=%d', numens) # Building the name of output files element = list(files_dict.values())[0][0] name_outputs = (element['short_name'] + '_' + str(numens) + 'ens_' + cfg['season'] + '_' + cfg['area'] + '_' + element['project'] + '_' + element['exp']) logger.info('The name of the output files will be <variable>_%s.txt', name_outputs) variable_name = element['short_name'] max_plot_panels = cfg.get('max_plot_panels', 72) numpcs = cfg.get('numpcs', 0) perc = cfg.get('numpcs', 80) filenames_cat = [] legend_cat = [] for value in files_dict.values(): logger.info("Processing file %s", value[0]['filename']) filenames_cat.append(value[0]['filename']) leg = (value[0]['project'] + " " + value[0]['dataset'] + " " + value[0]['exp'] + " " + value[0]['mip'] + " " + value[0]['short_name'] + " " + value[0]['ensemble'] + " " + str(value[0]['start_year']) + "-" + str(value[0]['end_year'])) legend_cat.append(leg) logger.info('Processing: %s', leg) namef = os.path.join(out_dir, 'legend_{0}.txt'.format(name_outputs)) np.savetxt(namef, legend_cat, fmt='%s') # ###################### PRECOMPUTATION ####################### outfiles = ens_anom(filenames_cat, out_dir, name_outputs, variable_name, numens, cfg['season'], cfg['area'], cfg['extreme']) # ###################### EOF AND K-MEANS ANALYSES ####################### outfiles2 = ens_eof_kmeans(out_dir, name_outputs, numens, numpcs, perc, cfg['numclus']) outfiles = outfiles + outfiles2 provenance_record = get_provenance_record(cfg, list(files_dict.values())[0][0], ancestor_files=filenames_cat) # ###################### PLOT AND SAVE FIGURES ########################## if write_plots: plotfiles = ens_plots(out_dir, cfg['plot_dir'], name_outputs, cfg['numclus'], 'anomalies', cfg['output_file_type'], cfg['season'], cfg['area'], cfg['extreme'], max_plot_panels) else: plotfiles = [] for file in outfiles + plotfiles: with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(file, provenance_record) logger.info('\n>>>>>>>>>>>> ENDED SUCCESSFULLY!! <<<<<<<<<<<<\n')