Ejemplo n.º 1
0
def test_collect_missing_ens_cycle(
    file_glob,
    ans_file,
    n_cores
):
    miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir)
    files = sorted(miss_ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
    # There is a bit of tricky encoding to deal with NaN in strings in netcdf
    # and type conversions
    if 'crs' in ens_cycle_ds.variables:
        ens_cycle_ds['crs'] = ens_cycle_ds['crs'].astype('S8')
        ens_cycle_ds['crs'].encoding['_FillValue'] = 'nan'
    # This is mostly because int32 is changed to float64 bc of nans
    for vv in ens_cycle_ds.variables:
        if 'time' not in vv:
            ens_cycle_ds[vv].encoding['dtype'] = ens_cycle_ds[vv].dtype

    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(ens_cycle_ds, ans)

    ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=1)
    if 'crs' in ens_cycle_ds.variables:
        ens_cycle_ds_chunk['crs'] = ens_cycle_ds_chunk['crs'].astype('S8')
    xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
Ejemplo n.º 2
0
def test_collect_missing_ens_cycle(file_glob, expected, n_cores):
    miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir)
    files = sorted(miss_ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
    # This checks everything about the metadata.
    assert repr(ens_cycle_ds) == expected

    ens_cycle_ds_chunk = open_whp_dataset(files,
                                          n_cores=n_cores,
                                          file_chunk_size=1)
    assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
Ejemplo n.º 3
0
def test_collect_ensemble_cycle_isel(file_glob, expected, n_cores, isel):
    ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
    files = sorted(ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel)
    # This checks everything about the metadata.
    assert repr(ens_cycle_ds) == expected

    ens_cycle_ds_chunk = open_whp_dataset(files,
                                          n_cores=n_cores,
                                          isel=isel,
                                          file_chunk_size=1)
    assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
Ejemplo n.º 4
0
def test_collect_ensemble_cycle_isel(file_glob, ans_file, n_cores, isel):
    ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
    files = sorted(ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel)
    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(ens_cycle_ds, ans)

    ens_cycle_ds_chunk = open_whp_dataset(files,
                                          n_cores=n_cores,
                                          isel=isel,
                                          file_chunk_size=2)
    xr.testing.assert_equal(ens_cycle_ds_chunk, ans)
Ejemplo n.º 5
0
def test_collect_profile_chunking(file_glob, expected, n_cores):
    sim_path = test_dir.joinpath(sim_dir)
    files = sorted(sim_path.glob(file_glob))
    sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15)
    # This checks everything about the metadata.
    assert repr(sim_ds) == expected

    # if file_chunk_size > and chunk is not None there is an error.
    sim_ds_chunk = open_whp_dataset(files,
                                    n_cores=n_cores,
                                    profile=True,
                                    chunks=15,
                                    file_chunk_size=2)
    assert sim_ds_chunk.equals(sim_ds)
Ejemplo n.º 6
0
def test_collect_profile_chunking(file_glob, ans_file, n_cores):
    sim_path = test_dir.joinpath(sim_dir)
    files = sorted(sim_path.glob(file_glob))
    sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15)
    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(sim_ds, ans)

    # if file_chunk_size > and chunk is not None there is an error.
    sim_ds_chunk = open_whp_dataset(files,
                                    n_cores=n_cores,
                                    profile=True,
                                    chunks=15,
                                    file_chunk_size=1)
    xr.testing.assert_equal(sim_ds_chunk, ans)
Ejemplo n.º 7
0
def test_collect_ensemble_cycle(file_glob, expected, n_cores):
    ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
    files = sorted(ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
    # This checks everything about the metadata.
    assert repr(ens_cycle_ds) == expected

    # Test that hierarchical collects are identical
    # Speed up this super slow one...
    file_chunk_size = 1
    if file_glob == '*/*/*LDASOUT_DOMAIN1':
        file_chunk_size = 50
    ens_cycle_ds_chunk = open_whp_dataset(files,
                                          n_cores=n_cores,
                                          file_chunk_size=file_chunk_size)
    assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
Ejemplo n.º 8
0
def test_collect_cycle(file_glob, expected, n_cores):
    cycle_path = test_dir.joinpath(cycle_dir)
    files = sorted(cycle_path.glob(file_glob))
    cycle_ds = open_whp_dataset(files, n_cores=n_cores)
    # This checks everything about the metadata.
    from pprint import pprint
    assert repr(cycle_ds) == expected
Ejemplo n.º 9
0
def test_collect_ensemble_cycle(file_glob, ans_file, n_cores):
    ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
    files = sorted(ens_cycle_path.glob(file_glob))
    ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(ens_cycle_ds, ans)

    # Test that hierarchical collects are identical
    # Speed up this super slow one...
    file_chunk_size = 1
    if file_glob == '*/*/*LDASOUT_DOMAIN1':
        file_chunk_size = 50
    ens_cycle_ds_chunk = open_whp_dataset(files,
                                          n_cores=n_cores,
                                          file_chunk_size=file_chunk_size)
    xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
Ejemplo n.º 10
0
def test_init(mod_dir, mod_glob):

    files = sorted(mod_dir.glob(mod_glob))
    mod = open_whp_dataset(files)
    mod_df = mod.streamflow.to_dataframe()
    obs_df = mod_df
    streamflow_eval = Evaluation(mod_df, obs_df)
    assert type(streamflow_eval) == Evaluation
Ejemplo n.º 11
0
def test_collect_simulation(
    file_glob,
    ans_file,
    n_cores
):
    sim_path = test_dir.joinpath(sim_dir)
    files = sorted(sim_path.glob(file_glob))
    sim_ds = open_whp_dataset(files, n_cores=n_cores)
    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(sim_ds, ans)
Ejemplo n.º 12
0
def test_collect_ensemble(
    file_glob,
    ans_file,
    n_cores
):
    ens_path = test_dir.joinpath(ens_dir)
    files = sorted(ens_path.glob(file_glob))
    ens_ds = open_whp_dataset(files, n_cores=n_cores)
    ans = xr.open_dataset(answer_dir / ans_file)
    xr.testing.assert_equal(ens_ds, ans)
Ejemplo n.º 13
0
def test_gof_perfect(engine, mod_dir, mod_glob, indices_dict, join_on,
                     variable, group_by_in, transform, transform_key,
                     expected_key):
    # Keep this variable agnostic
    files = sorted(mod_dir.glob(mod_glob))
    mod = open_whp_dataset(files).isel(indices_dict)

    if group_by_in is None:
        group_by_key = ''
        group_by = None
    elif group_by_in == 'space':
        group_by_key = '-' + group_by_in
        group_by = copy.deepcopy(join_on)
        group_by.remove('time')
    else:
        raise ValueError("not a valid grouping for this test: ", group_by)

    expected_answer_key = expected_key + group_by_key + '_' + transform_key
    # expected = gof_answer_reprs[expected_answer_key]
    expected = str_to_frame(gof_answer_reprs[expected_answer_key])

    if engine == 'pd':
        mod_df = mod[variable].to_dataframe().rename(
            columns={variable: 'modeled'})
        obs_df = mod[variable].to_dataframe().rename(
            columns={variable: 'observed'})
        mod_df.modeled = transform(mod_df.modeled)
        the_eval = Evaluation(mod_df, obs_df, join_on=join_on)
        gof = the_eval.gof(group_by=group_by)
        assert_frame_close(round_trip_df_serial(gof), expected)

    elif engine == 'xr':
        if group_by_in is not None:
            pytest.skip("Currently not grouping using xarray.")
        mod_ds = mod.rename({variable: 'modeled'})['modeled']
        obs_ds = mod.rename({variable: 'observed'})['observed']
        new_data = np.array(transform(mod_ds.to_dataframe().modeled)).reshape(
            mod_ds.shape)
        mod_ds.values = new_data
        # mod_ds = xr.DataArray(new_data, dims=mod_ds.dims, coords=mod_ds.coords)
        the_eval = Evaluation(mod_ds, obs_ds, join_on=join_on)
        gof = the_eval.gof(group_by=group_by).to_dataframe()
        # assert repr(gof) == expected
        assert_frame_close(round_trip_df_serial(gof), expected)
Ejemplo n.º 14
0
def test_collect_simulation(file_glob, expected, n_cores):
    sim_path = test_dir.joinpath(sim_dir)
    files = sorted(sim_path.glob(file_glob))
    sim_ds = open_whp_dataset(files, n_cores=n_cores)
    # This checks everything about the metadata.
    assert repr(sim_ds) == expected
Ejemplo n.º 15
0
def test_collect_ensemble(file_glob, expected, n_cores):
    ens_path = test_dir.joinpath(ens_dir)
    files = sorted(ens_path.glob(file_glob))
    ens_ds = open_whp_dataset(files, n_cores=n_cores)
    # This checks everything about the metadata.
    assert repr(ens_ds) == expected