Exemple #1
0
def test_loaded_DenseModel_predicts_with_clipped_inputs(tmpdir):
    hyperparameters = DenseHyperparameters(
        ["a", "b"],
        ["c"],
        clip_config=PackerConfig({"a": {
            "z": SliceConfig(None, 3)
        }}),
    )
    model = DenseModel(["a", "b"], ["c"], hyperparameters)

    nz = 5
    dims = ["x", "y", "z"]
    shape = (2, 2, nz)
    arr = np.arange(np.prod(shape)).reshape(shape).astype(float)
    input_data = xr.Dataset({
        "a": (dims, arr),
        "b": (dims, arr),
        "c": (dims, arr + 1)
    })
    model.fit([input_data])
    prediction = model.predict(input_data)
    output_path = str(tmpdir.join("trained_model"))
    fv3fit.dump(model, output_path)
    model_loaded = fv3fit.load(output_path)
    loaded_prediction = model_loaded.predict(input_data)
    xr.testing.assert_allclose(prediction, loaded_prediction)
Exemple #2
0
def open_model(config: MachineLearningConfig) -> MultiModelAdapter:
    model_paths = config.model
    models = []
    for path in model_paths:
        model = fv3fit.load(path)
        rename_in = config.input_standard_names
        rename_out = config.output_standard_names
        models.append(RenamingAdapter(model, rename_in, rename_out))
    return MultiModelAdapter(models)
Exemple #3
0
def test_cli(
    tmpdir,
    use_local_download_path: bool,
    use_validation_data: bool,
    model_type: str,
    hyperparameter_dict,
):
    """
    Test of fv3fit.train command-line interface.
    """
    config = get_config(
        tmpdir, [], model_type, hyperparameter_dict, use_validation_data,
    )
    mock_load_batches.return_value = [config.mock_dataset for _ in range(6)]
    if use_local_download_path:
        config.args.local_download_path = os.path.join(str(tmpdir), "local_download")
    cli_main(config.args)
    fv3fit.load(config.args.output_path)
    if use_local_download_path:
        assert len(os.listdir(config.args.local_download_path)) > 0
Exemple #4
0
def test_dump_and_load_default_maintains_prediction(model_type):
    n_sample, n_tile, nx, ny, n_feature = 1, 6, 12, 12, 2
    sample_func = get_uniform_sample_func(size=(n_sample, n_tile, nx, ny,
                                                n_feature))
    result = train_identity_model(model_type, sample_func=sample_func)

    original_result = result.model.predict(result.test_dataset)
    with tempfile.TemporaryDirectory() as tmpdir:
        fv3fit.dump(result.model, tmpdir)
        loaded_model = fv3fit.load(tmpdir)
    loaded_result = loaded_model.predict(result.test_dataset)
    xr.testing.assert_equal(loaded_result, original_result)
Exemple #5
0
def test_fit_bptt_command_line(regtest, sample_dim_name, dt):
    config_text = """
regularizer:
  name: l2
  kwargs:
    l: 0.0001
optimizer:
  name: Adam
  kwargs:
    learning_rate: 0.001
    amsgrad: True
    clipnorm: 1.0
hyperparameters:
  n_hidden_layers: 3
  n_units: 256
  train_batch_size: 48
  use_moisture_limiter: true
  state_noise: 0.25
input_variables:
  - a
  - b
decrease_learning_rate_epoch: 1
decreased_learning_rate: 0.0001
total_epochs: 2
random_seed: 0
"""
    np.random.seed(0)
    training_dataset = get_train_dataset(sample_dim_name, dt)
    with tempfile.TemporaryDirectory() as tmpdir:
        arrays_dir = os.path.join(tmpdir, "arrays")
        os.mkdir(arrays_dir)
        for i in range(5):
            filename = os.path.join(arrays_dir, f"arrays_{i}.nc")
            training_dataset.to_netcdf(filename)
        config_filename = os.path.join(tmpdir, "config.yaml")
        with open(config_filename, "w") as f:
            f.write(config_text)
        outdir = os.path.join(tmpdir, "output")
        subprocess.check_call(
            ["python", "-m", "fv3fit.train_bptt", arrays_dir, config_filename, outdir]
        )
        # as a minimum, test that output exists
        assert os.path.isdir(outdir)
        assert len(os.listdir(outdir)) > 0

        # one model per epoch is saved
        for sample_model_dir in sorted(os.listdir(outdir)):
            loaded = fv3fit.load(os.path.join(outdir, sample_model_dir))

            first_timestep = training_dataset.isel(time=0)
            loaded_output = loaded.predict(first_timestep)

            assert isinstance(loaded_output, xr.Dataset)
Exemple #6
0
def test_constant_model_predict_after_dump_and_load(input_variables,
                                                    output_variables, nz):
    gridded_dataset = get_gridded_dataset(nz)
    outputs = get_first_columns(gridded_dataset, output_variables)
    predictor = get_predictor(input_variables, output_variables, outputs)
    with tempfile.TemporaryDirectory() as tempdir:
        fv3fit.dump(predictor, tempdir)
        predictor = fv3fit.load(tempdir)

    ds_pred = predictor.predict(gridded_dataset)

    assert sorted(list(ds_pred.data_vars.keys())) == sorted(output_variables)
    for name in output_variables:
        assert np.all(
            stack_non_vertical(ds_pred[name]).values == outputs[name][None, :])
Exemple #7
0
def test_dump_and_load(tmpdir):
    derived_model = DerivedModel(
        base_model, derived_output_variables=["net_shortwave_sfc_flux_derived"],
    )
    ds_in = xr.Dataset(
        data_vars={
            "input": xr.DataArray(np.zeros([3, 3, 5]), dims=["x", "y", "z"],),
            "surface_diffused_shortwave_albedo": xr.DataArray(
                np.zeros([3, 3]), dims=["x", "y"],
            ),
        }
    )
    prediction = derived_model.predict(ds_in)

    fv3fit.dump(derived_model, str(tmpdir))
    loaded_model = fv3fit.load(str(tmpdir))

    prediction_after_load = loaded_model.predict(ds_in)
    assert prediction_after_load.identical(prediction)
Exemple #8
0
def test_reloaded_model_gives_same_outputs(sample_dim_name, dt):
    train_dataset = get_train_dataset(sample_dim_name, dt)
    model = _BPTTTrainer(
        sample_dim_name,
        ["a", "b"],
        n_units=32,
        n_hidden_layers=4,
        kernel_regularizer=None,
        train_batch_size=48,
        optimizer="adam",
    )
    model.fit_statistics(train_dataset)
    model.fit(train_dataset)

    with tempfile.TemporaryDirectory() as tmpdir:
        fv3fit.dump(model.predictor_model, tmpdir)
        loaded = fv3fit.load(tmpdir)

    first_timestep = train_dataset.isel(time=0)
    reference_output = model.predictor_model.predict(first_timestep)
    # test that loaded model gives the same predictions
    loaded_output = loaded.predict(first_timestep)
    xr.testing.assert_equal(reference_output, loaded_output)
Exemple #9
0
def main(args):
    logger.info("Starting diagnostics routine.")

    with fsspec.open(args.data_yaml, "r") as f:
        as_dict = yaml.safe_load(f)
    config = loaders.BatchesLoader.from_dict(as_dict)

    logger.info("Reading grid...")
    if not args.grid:
        # By default, read the appropriate resolution grid from vcm.catalog
        grid = load_grid_info(args.grid_resolution)
    else:
        with fsspec.open(args.grid, "rb") as f:
            grid = xr.open_dataset(f, engine="h5netcdf").load()

    logger.info("Opening ML model")
    model = fv3fit.load(args.model_path)

    # add Q2 and total water path for PW-Q2 scatterplots and net precip domain averages
    if any(["Q2" in v for v in model.output_variables]):
        model = fv3fit.DerivedModel(model, derived_output_variables=["Q2"])
        model_variables = _variables_to_load(model) + ["water_vapor_path"]
    else:
        model_variables = _variables_to_load(model)

    output_data_yaml = os.path.join(args.output_path, "data_config.yaml")
    with fsspec.open(args.data_yaml,
                     "r") as f_in, fsspec.open(output_data_yaml, "w") as f_out:
        f_out.write(f_in.read())
    batches = config.load_batches(model_variables)
    predict_function = _get_predict_function(model, model_variables, grid)
    batches = loaders.Map(predict_function, batches)

    # compute diags
    ds_diagnostics, ds_scalar_metrics = _compute_diagnostics(
        batches,
        grid,
        predicted_vars=model.output_variables,
        n_jobs=args.n_jobs)
    ds_diagnostics = ds_diagnostics.update(grid)

    # save model senstivity figures- these exclude derived variables
    base_model = model.base_model if isinstance(model,
                                                fv3fit.DerivedModel) else model
    try:
        plot_jacobian(
            base_model,
            os.path.join(args.output_path,
                         "model_sensitivity_figures"),  # type: ignore
        )
    except AttributeError:
        try:
            input_feature_indices = get_variable_indices(
                data=batches[0], variables=base_model.input_variables)
            plot_rf_feature_importance(
                input_feature_indices,
                base_model,
                os.path.join(args.output_path, "model_sensitivity_figures"),
            )
        except AttributeError:
            logger.info(
                f"Base model is {type(base_model).__name__}, "
                "which currently has no feature importance or Jacobian "
                "calculation implemented.")
            pass

    mapper = _get_data_mapper_if_exists(config)
    if mapper is not None:
        snapshot_time = (args.snapshot_time or sorted(
            config.kwargs.get("timesteps", list(mapper.keys())))[0])
        snapshot_key = nearest_time(snapshot_time, list(mapper.keys()))
        ds_snapshot = predict_function(mapper[snapshot_key])

        vertical_vars = [
            var for var in model.output_variables if is_3d(ds_snapshot[var])
        ]
        ds_snapshot = insert_column_integrated_vars(ds_snapshot, vertical_vars)
        predicted_vars = [
            var for var in ds_snapshot if "derivation" in ds_snapshot[var].dims
        ]

        # add snapshotted prediction to saved diags.nc
        ds_diagnostics = ds_diagnostics.merge(
            safe.get_variables(ds_snapshot, predicted_vars).rename(
                {v: f"{v}_snapshot"
                 for v in predicted_vars}))

        ds_transect = _get_transect(ds_snapshot, grid, vertical_vars)
        _write_nc(ds_transect, args.output_path, TRANSECT_NC_NAME)

    ds_diagnostics = _add_derived_diagnostics(ds_diagnostics)

    _write_nc(
        ds_diagnostics,
        args.output_path,
        DIAGS_NC_NAME,
    )

    # convert and output metrics json
    metrics = _average_metrics_dict(ds_scalar_metrics)
    with fsspec.open(os.path.join(args.output_path, METRICS_JSON_NAME),
                     "w") as f:
        json.dump(metrics, f, indent=4)

    metadata = {}
    metadata["model_path"] = args.model_path
    metadata["data_config"] = dataclasses.asdict(config)
    with fsspec.open(os.path.join(args.output_path, METADATA_JSON_NAME),
                     "w") as f:
        json.dump(metadata, f, indent=4)

    logger.info(f"Finished processing dataset diagnostics and metrics.")
Exemple #10
0
 def __post_init__(self: "Adapter"):
     self.model = fv3fit.load(self.config.url)