def test_integrated(): """Test a multi-region rep profile calc serial vs. parallel and against baseline results.""" sites = np.arange(100) ones = np.ones((100, )) zeros = np.zeros((100, )) regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'weight': ones, 'region': regions, 'timezone': timezone }) p1, m1, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', max_workers=1, weight='weight') p2, m2, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', max_workers=None, weight='weight') assert np.allclose(m1['rep_res_gid'].values.astype(int), m2['rep_res_gid'].values.astype(int)) assert np.allclose(p1[0], p2[0]) assert m1.loc[0, 'rep_res_gid'] == 4 assert m1.loc[1, 'rep_res_gid'] == 15 assert m1.loc[2, 'rep_res_gid'] == 60
def direct(ctx, gen_fpath, rev_summary, reg_cols, cf_dset, rep_method, err_method, weight, n_profiles, out_dir, log_dir, max_workers, aggregate_profiles, verbose): """reV representative profiles CLI.""" name = ctx.obj['NAME'] ctx.obj['GEN_FPATH'] = gen_fpath ctx.obj['REV_SUMMARY'] = rev_summary ctx.obj['REG_COLS'] = reg_cols ctx.obj['CF_DSET'] = cf_dset ctx.obj['REP_METHOD'] = rep_method ctx.obj['ERR_METHOD'] = err_method ctx.obj['WEIGHT'] = weight ctx.obj['N_PROFILES'] = n_profiles ctx.obj['OUT_DIR'] = out_dir ctx.obj['LOG_DIR'] = log_dir ctx.obj['MAX_WORKERS'] = max_workers ctx.obj['AGGREGATE_PROFILES'] = aggregate_profiles ctx.obj['VERBOSE'] = verbose if ctx.invoked_subcommand is None: t0 = time.time() init_mult(name, log_dir, modules=['reV', 'rex'], verbose=verbose) fn_out = '{}.h5'.format(name) fout = os.path.join(out_dir, fn_out) if aggregate_profiles: AggregatedRepProfiles.run(gen_fpath, rev_summary, cf_dset=cf_dset, weight=weight, fout=fout, max_workers=max_workers) else: RepProfiles.run(gen_fpath, rev_summary, reg_cols, cf_dset=cf_dset, rep_method=rep_method, err_method=err_method, weight=weight, fout=fout, n_profiles=n_profiles, max_workers=max_workers) runtime = (time.time() - t0) / 60 logger.info('reV representative profiles complete. ' 'Time elapsed: {:.2f} min. Target output dir: {}'.format( runtime, out_dir)) status = { 'dirout': out_dir, 'fout': fn_out, 'job_status': 'successful', 'runtime': runtime, 'finput': [gen_fpath, rev_summary] } Status.make_job_file(out_dir, 'rep-profiles', name, status)
def test_many_regions(): """Test multiple complicated regions.""" sites = np.arange(100) zeros = np.zeros((100, )) region1 = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) region2 = (['a0'] * 20) + (['b1'] * 10) + (['c2'] * 20) + (['d3'] * 50) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'region1': region1, 'region2': region2, 'timezone': timezone }) reg_cols = ['region1', 'region2'] p1, m1, _ = RepProfiles.run(GEN_FPATH, rev_summary, reg_cols, weight=None) assert p1[0].shape == (17520, 6) assert len(m1) == 6 for r1 in set(region1): assert r1 in m1['region1'].values for r2 in set(region2): assert r2 in m1['region2'].values
def test_sc_points(): """Test rep profiles for each SC point.""" sites = np.arange(10) timezone = np.random.choice([-4, -5, -6, -7], 10) rev_summary = pd.DataFrame({ 'sc_gid': sites, 'gen_gids': sites, 'res_gids': sites, 'timezone': timezone }) p1 = RepProfiles.run(GEN_FPATH, rev_summary, 'sc_gid', weight=None, max_workers=1)[0] with Resource(GEN_FPATH) as res: truth = res['cf_profile', :, slice(0, 10)] assert np.allclose(p1[0], truth)
def test_write_to_file(): """Test rep profiles with file write.""" sites = np.arange(100) zeros = np.zeros((100, )) regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'region': regions, 'timezone': timezone }) fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5') p1, m1, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', fout=fout, n_profiles=3, weight=None) with Resource(fout) as res: disk_profiles = res['rep_profiles_0'] disk_meta = res.meta assert 'rep_profiles_2' in res.datasets assert not np.array_equal(res['rep_profiles_0'], res['rep_profiles_1']) assert np.allclose(p1[0], disk_profiles) assert len(disk_meta) == 3 for i in m1.index: v1 = json.loads(m1.loc[i, 'rep_gen_gid']) v2 = json.loads(disk_meta.loc[i, 'rep_gen_gid']) assert v1 == v2 if PURGE_OUT: os.remove(fout)
def test_file_options(): """Test rep profiles with file write.""" sites = np.arange(100) zeros = np.zeros((100, )) regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60) timezone = np.random.choice([-4, -5, -6, -7], 100) rev_summary = pd.DataFrame({ 'gen_gids': sites, 'res_gids': sites, 'res_class': zeros, 'region': regions, 'timezone': timezone }) fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5') p1, _, _ = RepProfiles.run(GEN_FPATH, rev_summary, 'region', fout=fout, n_profiles=3, save_rev_summary=False, scaled_precision=True, weight=None) with Resource(fout) as res: dtype = res.get_dset_properties('rep_profiles_0')[1] attrs = res.get_attrs('rep_profiles_0') disk_profiles = res['rep_profiles_0'] disk_dsets = res.datasets assert np.issubdtype(dtype, np.integer) assert attrs['scale_factor'] == 1000 assert np.allclose(p1[0], disk_profiles) assert 'rev_summary' not in disk_dsets if PURGE_OUT: os.remove(fout)