def h5(cls, h5_file, out_dir, dsets=None, group=None, process_size=None, max_workers=None, plot_type='plotly', cmap='viridis', **kwargs): """ Run QA/QC by computing summary stats from dsets in h5_file and plotting scatters plots of compatible summary stats Parameters ---------- h5_file : str Path to .h5 file to run QA/QC on out_dir : str Directory path to save summary tables and plots too dsets : str | list, optional Datasets to summarize, by default None group : str, optional Group within h5_file to summarize datasets for, by default None process_size : int, optional Number of sites to process at a time, by default None max_workers : int, optional Number of workers to use when summarizing 2D datasets, by default None plot_type : str, optional plot_type of plot to create 'plot' or 'plotly', by default 'plotly' cmap : str, optional Colormap name, by default 'viridis' kwargs : dict Additional plotting kwargs """ try: qa_qc = cls(out_dir) SummarizeH5.run(h5_file, out_dir, group=group, dsets=dsets, process_size=process_size, max_workers=max_workers) qa_qc.create_scatter_plots(plot_type=plot_type, cmap=cmap, **kwargs) except Exception as e: logger.exception( 'QAQC failed on file: {}. Received exception:\n{}'.format( os.path.basename(h5_file), e)) raise e else: logger.info( 'Finished QAQC on file: {} output directory: {}'.format( os.path.basename(h5_file), out_dir))
def h5(ctx, h5_file, dsets, group, process_size, max_workers): """ Summarize datasets in .h5 file """ SummarizeH5.run(h5_file, ctx.obj['OUT_DIR'], group=group, dsets=dsets, process_size=process_size, max_workers=max_workers)
def test_summarize(dataset): """Run QA/QC Summarize and compare with baseline""" summary = SummarizeH5(H5_FILE) if dataset is None: baseline = os.path.join(SUMMARY_DIR, 'ri_wind_gen_profiles_2010_summary.csv') baseline = pd.read_csv(baseline) test = summary.summarize_means() elif dataset == 'cf_mean': baseline = os.path.join(SUMMARY_DIR, 'cf_mean_summary.csv') baseline = pd.read_csv(baseline, index_col=0) test = summary.summarize_dset(dataset, process_size=None, max_workers=1) elif dataset == 'cf_profile': baseline = os.path.join(SUMMARY_DIR, 'cf_profile_summary.csv') baseline = pd.read_csv(baseline, index_col=0) test = summary.summarize_dset(dataset, process_size=None, max_workers=1) assert_frame_equal(test, baseline, check_dtype=False)