def main(cfg):
    """Run the diagnostic."""
    sns.set(**cfg.get('seaborn_settings', {}))
    patterns = cfg.get('patterns')
    if patterns is None:
        input_files = io.get_all_ancestor_files(cfg)
    else:
        input_files = []
        for pattern in patterns:
            input_files.extend(io.get_all_ancestor_files(cfg, pattern=pattern))
    if not input_files:
        raise ValueError("No input files found")
    logger.info("Found input files:\n%s", pformat(input_files))

    # Iterate over all files and extract data
    (all_data, all_files, metadata) = get_all_data(cfg, input_files)

    # Create plot and netcdf file
    plot_path = plot_data(cfg, all_data, metadata)
    netcdf_path = write_data(cfg, all_data, metadata)

    # Provenance
    caption = f"{metadata['long_name']} for multiple datasets."
    provenance_record = get_provenance_record(caption, all_files)
    if plot_path is not None:
        provenance_record.update({
            'plot_file': plot_path,
            'plot_types': ['bar'],
        })
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)
Beispiel #2
0
def get_input_files(cfg, patterns=None, ignore_patterns=None):
    """Get input files.

    Parameters
    ----------
    cfg : dict
        Recipe configuration.
    patterns : list of str, optional
        Use only files that match these patterns as input files.
    ignore_patterns : list of str, optional
        Ignore input files that match these patterns.

    Returns
    -------
    list of str
        Input files.

    """
    input_files = []

    # Include only files that match patterns
    if patterns is None:
        patterns = []
    if not patterns:
        patterns.append('*.nc')
    for pattern in patterns:
        logger.debug("Looking for files matching the pattern '%s'", pattern)
        input_files.extend(io.get_all_ancestor_files(cfg, pattern=pattern))

    # Ignore files
    if not ignore_patterns:
        return input_files
    ignore_files = []
    for pattern in ignore_patterns:
        logger.debug("Ignoring for files matching the pattern '%s'", pattern)
        ignore_files.extend(io.get_all_ancestor_files(cfg, pattern=pattern))
    valid_files = []
    for filename in input_files:
        if filename not in ignore_files:
            valid_files.append(filename)
    return valid_files
Beispiel #3
0
def main(cfg):
    """Run the diagnostic."""
    cfg.setdefault('exclude_datasets', ['MultiModelMean'])

    # Get input files
    patterns = cfg.get('patterns')
    if patterns is None:
        input_files = io.get_all_ancestor_files(cfg)
    else:
        input_files = []
        for pattern in patterns:
            input_files.extend(io.get_all_ancestor_files(cfg, pattern=pattern))
    if not input_files:
        raise ValueError("No input files found")
    logger.info("Found input files:\n%s", pformat(input_files))

    # Create data frame
    data_frame = create_data_frame(input_files, cfg['exclude_datasets'])

    # Calculate statistics
    data_frame = calculate_statistics(data_frame, cfg)

    # Round output if desired
    if 'round_output' in cfg:
        data_frame = data_frame.round(decimals=cfg['round_output'])

    # Save file
    basename = '-'.join(data_frame.index.levels[0]) + '_'
    basename += '-'.join(data_frame.columns)
    csv_path = get_diagnostic_filename(basename, cfg).replace('.nc', '.csv')
    data_frame.to_csv(csv_path)
    logger.info("Wrote %s", csv_path)
    with pd.option_context(*PANDAS_PRINT_OPTIONS):
        logger.info("Data:\n%s", data_frame)

    # Provenance
    write_provenance(cfg, csv_path, data_frame, input_files)
def test_get_all_ancestor_files(mock_walk, pattern, output):
    """Test retrieving of ancestor files."""
    input_dirs = [
        [
            (ROOT_DIR, ['dir', '__pycache__'], ['test.nc', 'egg.yml']),
            (os.path.join(ROOT_DIR, 'root2'), ['d'], ['x.nc', 'y.png']),
            (os.path.join(ROOT_DIR, 'root3'), [], ['egg.nc']),
        ],
        [
            (ROOT_DIR, ['dir', '__pycache__'], ['test_1.nc', 'test_2.yml']),
            (os.path.join(ROOT_DIR, 'root4'), ['d2'], ['egg.nc']),
        ],
    ]
    mock_walk.side_effect = input_dirs
    files = io.get_all_ancestor_files(CFG, pattern=pattern)
    assert files == output
def main(cfg):
    """Run the diagnostic."""
    sns.set(**cfg.get('seaborn_settings', {}))
    input_files = io.get_all_ancestor_files(cfg, pattern=cfg.get('pattern'))
    if len(input_files) != 1:
        raise ValueError(f"Expected exactly 1 file, got {len(input_files)}")
    input_file = input_files[0]
    logger.info("Found input file: %s", input_file)

    # Create plots
    cube = iris.load_cube(input_file)
    try:
        cube.coord('dataset')
    except iris.exceptions.CoordinateNotFoundError as exc:
        logger.error(
            "File '%s' does not contain necessary coordinate 'dataset'",
            input_file)
        raise exc

    # Sort coordinate 'dataset'
    [cube] = iris_helpers.intersect_dataset_coordinates([cube])

    # Create plot and netcdf file
    plot_path = plot_data(cfg, cube)
    netcdf_path = write_data(cfg, cube)

    # Provenance
    project = cube.attributes.get('project')
    caption = "{}{} for multiple datasets.".format(
        cube.long_name, '' if project is None else f' for {project}')
    provenance_record = get_provenance_record(caption, [input_file])
    if plot_path is not None:
        provenance_record.update({
            'plot_file': plot_path,
            'plot_types': ['scatter'],
        })
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)