Esempio n. 1
0
def test_integrated():
    """Test a multi-region rep profile calc serial vs. parallel and against
    baseline results."""
    sites = np.arange(100)
    ones = np.ones((100, ))
    zeros = np.zeros((100, ))
    regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'weight': ones,
        'region': regions,
        'timezone': timezone
    })
    p1, m1, _ = RepProfiles.run(GEN_FPATH,
                                rev_summary,
                                'region',
                                max_workers=1,
                                weight='weight')
    p2, m2, _ = RepProfiles.run(GEN_FPATH,
                                rev_summary,
                                'region',
                                max_workers=None,
                                weight='weight')

    assert np.allclose(m1['rep_res_gid'].values.astype(int),
                       m2['rep_res_gid'].values.astype(int))
    assert np.allclose(p1[0], p2[0])
    assert m1.loc[0, 'rep_res_gid'] == 4
    assert m1.loc[1, 'rep_res_gid'] == 15
    assert m1.loc[2, 'rep_res_gid'] == 60
Esempio n. 2
0
def direct(ctx, gen_fpath, rev_summary, reg_cols, cf_dset, rep_method,
           err_method, weight, n_profiles, out_dir, log_dir, max_workers,
           aggregate_profiles, verbose):
    """reV representative profiles CLI."""
    name = ctx.obj['NAME']
    ctx.obj['GEN_FPATH'] = gen_fpath
    ctx.obj['REV_SUMMARY'] = rev_summary
    ctx.obj['REG_COLS'] = reg_cols
    ctx.obj['CF_DSET'] = cf_dset
    ctx.obj['REP_METHOD'] = rep_method
    ctx.obj['ERR_METHOD'] = err_method
    ctx.obj['WEIGHT'] = weight
    ctx.obj['N_PROFILES'] = n_profiles
    ctx.obj['OUT_DIR'] = out_dir
    ctx.obj['LOG_DIR'] = log_dir
    ctx.obj['MAX_WORKERS'] = max_workers
    ctx.obj['AGGREGATE_PROFILES'] = aggregate_profiles
    ctx.obj['VERBOSE'] = verbose

    if ctx.invoked_subcommand is None:
        t0 = time.time()
        init_mult(name, log_dir, modules=['reV', 'rex'], verbose=verbose)

        fn_out = '{}.h5'.format(name)
        fout = os.path.join(out_dir, fn_out)

        if aggregate_profiles:
            AggregatedRepProfiles.run(gen_fpath,
                                      rev_summary,
                                      cf_dset=cf_dset,
                                      weight=weight,
                                      fout=fout,
                                      max_workers=max_workers)
        else:
            RepProfiles.run(gen_fpath,
                            rev_summary,
                            reg_cols,
                            cf_dset=cf_dset,
                            rep_method=rep_method,
                            err_method=err_method,
                            weight=weight,
                            fout=fout,
                            n_profiles=n_profiles,
                            max_workers=max_workers)

        runtime = (time.time() - t0) / 60
        logger.info('reV representative profiles complete. '
                    'Time elapsed: {:.2f} min. Target output dir: {}'.format(
                        runtime, out_dir))

        status = {
            'dirout': out_dir,
            'fout': fn_out,
            'job_status': 'successful',
            'runtime': runtime,
            'finput': [gen_fpath, rev_summary]
        }
        Status.make_job_file(out_dir, 'rep-profiles', name, status)
Esempio n. 3
0
def test_many_regions():
    """Test multiple complicated regions."""
    sites = np.arange(100)
    zeros = np.zeros((100, ))
    region1 = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    region2 = (['a0'] * 20) + (['b1'] * 10) + (['c2'] * 20) + (['d3'] * 50)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'region1': region1,
        'region2': region2,
        'timezone': timezone
    })
    reg_cols = ['region1', 'region2']
    p1, m1, _ = RepProfiles.run(GEN_FPATH, rev_summary, reg_cols, weight=None)

    assert p1[0].shape == (17520, 6)
    assert len(m1) == 6

    for r1 in set(region1):
        assert r1 in m1['region1'].values

    for r2 in set(region2):
        assert r2 in m1['region2'].values
Esempio n. 4
0
def test_sc_points():
    """Test rep profiles for each SC point."""
    sites = np.arange(10)
    timezone = np.random.choice([-4, -5, -6, -7], 10)
    rev_summary = pd.DataFrame({
        'sc_gid': sites,
        'gen_gids': sites,
        'res_gids': sites,
        'timezone': timezone
    })

    p1 = RepProfiles.run(GEN_FPATH,
                         rev_summary,
                         'sc_gid',
                         weight=None,
                         max_workers=1)[0]

    with Resource(GEN_FPATH) as res:
        truth = res['cf_profile', :, slice(0, 10)]

    assert np.allclose(p1[0], truth)
Esempio n. 5
0
def test_write_to_file():
    """Test rep profiles with file write."""

    sites = np.arange(100)
    zeros = np.zeros((100, ))
    regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'region': regions,
        'timezone': timezone
    })
    fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5')
    p1, m1, _ = RepProfiles.run(GEN_FPATH,
                                rev_summary,
                                'region',
                                fout=fout,
                                n_profiles=3,
                                weight=None)
    with Resource(fout) as res:
        disk_profiles = res['rep_profiles_0']
        disk_meta = res.meta
        assert 'rep_profiles_2' in res.datasets
        assert not np.array_equal(res['rep_profiles_0'], res['rep_profiles_1'])

    assert np.allclose(p1[0], disk_profiles)
    assert len(disk_meta) == 3

    for i in m1.index:
        v1 = json.loads(m1.loc[i, 'rep_gen_gid'])
        v2 = json.loads(disk_meta.loc[i, 'rep_gen_gid'])
        assert v1 == v2

    if PURGE_OUT:
        os.remove(fout)
Esempio n. 6
0
def test_file_options():
    """Test rep profiles with file write."""

    sites = np.arange(100)
    zeros = np.zeros((100, ))
    regions = (['r0'] * 7) + (['r1'] * 33) + (['r2'] * 60)
    timezone = np.random.choice([-4, -5, -6, -7], 100)
    rev_summary = pd.DataFrame({
        'gen_gids': sites,
        'res_gids': sites,
        'res_class': zeros,
        'region': regions,
        'timezone': timezone
    })
    fout = os.path.join(TESTDATADIR, 'sc_out/temp_rep_profiles.h5')
    p1, _, _ = RepProfiles.run(GEN_FPATH,
                               rev_summary,
                               'region',
                               fout=fout,
                               n_profiles=3,
                               save_rev_summary=False,
                               scaled_precision=True,
                               weight=None)
    with Resource(fout) as res:
        dtype = res.get_dset_properties('rep_profiles_0')[1]
        attrs = res.get_attrs('rep_profiles_0')
        disk_profiles = res['rep_profiles_0']
        disk_dsets = res.datasets

    assert np.issubdtype(dtype, np.integer)
    assert attrs['scale_factor'] == 1000
    assert np.allclose(p1[0], disk_profiles)
    assert 'rev_summary' not in disk_dsets

    if PURGE_OUT:
        os.remove(fout)