Esempio n. 1
0
    def h5(cls,
           h5_file,
           out_dir,
           dsets=None,
           group=None,
           process_size=None,
           max_workers=None,
           plot_type='plotly',
           cmap='viridis',
           **kwargs):
        """
        Run QA/QC by computing summary stats from dsets in h5_file and
        plotting scatters plots of compatible summary stats

        Parameters
        ----------
        h5_file : str
            Path to .h5 file to run QA/QC on
        out_dir : str
            Directory path to save summary tables and plots too
        dsets : str | list, optional
            Datasets to summarize, by default None
        group : str, optional
            Group within h5_file to summarize datasets for, by default None
        process_size : int, optional
            Number of sites to process at a time, by default None
        max_workers : int, optional
            Number of workers to use when summarizing 2D datasets,
            by default None
        plot_type : str, optional
            plot_type of plot to create 'plot' or 'plotly', by default 'plotly'
        cmap : str, optional
            Colormap name, by default 'viridis'
        kwargs : dict
            Additional plotting kwargs
        """
        try:
            qa_qc = cls(out_dir)
            SummarizeH5.run(h5_file,
                            out_dir,
                            group=group,
                            dsets=dsets,
                            process_size=process_size,
                            max_workers=max_workers)
            qa_qc.create_scatter_plots(plot_type=plot_type,
                                       cmap=cmap,
                                       **kwargs)
        except Exception as e:
            logger.exception(
                'QAQC failed on file: {}. Received exception:\n{}'.format(
                    os.path.basename(h5_file), e))
            raise e
        else:
            logger.info(
                'Finished QAQC on file: {} output directory: {}'.format(
                    os.path.basename(h5_file), out_dir))
Esempio n. 2
0
def h5(ctx, h5_file, dsets, group, process_size, max_workers):
    """
    Summarize datasets in .h5 file
    """
    SummarizeH5.run(h5_file,
                    ctx.obj['OUT_DIR'],
                    group=group,
                    dsets=dsets,
                    process_size=process_size,
                    max_workers=max_workers)
Esempio n. 3
0
def test_summarize(dataset):
    """Run QA/QC Summarize and compare with baseline"""

    summary = SummarizeH5(H5_FILE)

    if dataset is None:
        baseline = os.path.join(SUMMARY_DIR,
                                'ri_wind_gen_profiles_2010_summary.csv')
        baseline = pd.read_csv(baseline)
        test = summary.summarize_means()
    elif dataset == 'cf_mean':
        baseline = os.path.join(SUMMARY_DIR, 'cf_mean_summary.csv')
        baseline = pd.read_csv(baseline, index_col=0)
        test = summary.summarize_dset(dataset,
                                      process_size=None,
                                      max_workers=1)
    elif dataset == 'cf_profile':
        baseline = os.path.join(SUMMARY_DIR, 'cf_profile_summary.csv')
        baseline = pd.read_csv(baseline, index_col=0)
        test = summary.summarize_dset(dataset,
                                      process_size=None,
                                      max_workers=1)

    assert_frame_equal(test, baseline, check_dtype=False)