Exemple #1
0
def preprocess(monthly=True):
    regrid = get_data_path() / "interim/VCI_preprocessed/data_kenya.nc"
    preprocessor = BokuNDVIPreprocessor(get_data_path(), resolution="1000")

    if monthly:
        preprocessor.preprocess(subset_str="kenya", regrid=regrid, resample_time="M")
    else:
        preprocessor.preprocess(
            subset_str="kenya", regrid=regrid, resample_time="W-MON"
        )
Exemple #2
0
def process_vci(subset_str: str = "kenya"):
    data_path = get_data_path()
    processor = VHIPreprocessor(get_data_path(), "VCI")
    regrid_path = (
        data_path /
        f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc")
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor.preprocess(subset_str=subset_str,
                         resample_time="M",
                         upsampling=False,
                         regrid=regrid_path)
Exemple #3
0
def process_vci_2018():

    processor = VHIPreprocessor(get_data_path(), "VCI")

    processor.preprocess(subset_str="kenya",
                         resample_time="M",
                         upsampling=False)
def main(target_var, all_vars):
    # RUN engineer
    engineer(target_var=target_var)

    autoregressive = [target_var]  # 'VCI3M'
    dynamic = ["precip", "t2m", "pet", "E", "SMroot", "SMsurf"]
    static_list = [False, False, True]

    for vars_to_include, static_bool in zip(
        [autoregressive, autoregressive + dynamic, autoregressive + dynamic],
            static_list,
    ):
        print(
            f'\n{"-" * 10}\nRunning experiment with: {vars_to_include} with static: {static_bool} for {target_var}\n{"-" * 10}'
        )

        # FIT models
        vars_to_exclude = [v for v in all_vars if v not in vars_to_include]

        parsimonious()
        if static_bool:
            lstm(vars_to_exclude, static="features")
            ealstm(vars_to_exclude, static="features")
        else:
            lstm(vars_to_exclude, static=None)

        # RENAME model directories
        data_dir = get_data_path()
        rename_model_experiment_file(data_dir,
                                     vars_to_include,
                                     static=static_bool,
                                     target_var=target_var)
Exemple #5
0
def linear_nn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    layer_sizes=[100],
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    predictor = LinearNetwork(
        layer_sizes=layer_sizes,
        data_folder=get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    if explain:
        _ = predictor.explain(save_shap_values=True)
Exemple #6
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    ignore_vars=None,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=128,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
        )
        predictor.train(num_epochs=50, early_stopping=5)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    test_file = data_path / f"features/{experiment}/test/2018_3"
    assert test_file.exists()
    all_explanations_for_file(test_file, predictor, batch_size=100)
Exemple #7
0
def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    predictor = LinearRegression(
        get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train()
    predictor.evaluate(save_preds=True)

    # mostly to test it works
    if explain:
        predictor.explain(save_shap_values=True)
Exemple #8
0
def export_s5():

    granularity = "hourly"
    pressure_level = False

    exporter = S5Exporter(
        data_folder=get_data_path(),
        granularity=granularity,
        pressure_level=pressure_level,
    )
    variable = "total_precipitation"
    min_year = 1993
    max_year = 2014
    min_month = 1
    max_month = 12
    max_leadtime = None
    pressure_levels = [200, 500, 925]
    n_parallel_requests = 20

    exporter.export(
        variable=variable,
        min_year=min_year,
        max_year=max_year,
        min_month=min_month,
        max_month=max_month,
        max_leadtime=max_leadtime,
        pressure_levels=pressure_levels,
        n_parallel_requests=n_parallel_requests,
    )
Exemple #9
0
def gbdt(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    explain=False,
    static="features",
    ignore_vars=None,
    # predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    data_path = get_data_path()

    # initialise, train and save GBDT model
    predictor = GBDT(
        data_folder=data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train(early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()
Exemple #10
0
def process_gleam(subset_str: str = "kenya"):
    data_path = get_data_path()

    regrid_path = (
        data_path /
        f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc")
    assert regrid_path.exists(), f"{regrid_path} not available"
Exemple #11
0
def run_models(target_var: str):
    parsimonious()
    # -------
    # LSTM
    # -------
    rnn(  # earnn(
        experiment="one_month_forecast",
        include_pred_month=True,
        surrounding_pixels=None,
        explain=False,
        static=None,  # "features",
        ignore_vars=None,
        num_epochs=50,  # 50
        early_stopping=5,  # 5
        hidden_size=256,
        predict_delta=False,
        normalize_y=True,
        include_prev_y=False,
        include_latlons=False,
    )

    # -------
    # EALSTM
    # -------

    # rename the output file
    data_path = get_data_path()

    _rename_directory(
        from_path=data_path / "models" / "one_month_forecast",
        to_path=data_path / "models" /
        f"one_month_forecast_adede_only_target_{target_var}",
        with_datetime=False,
    )
def export_era5(variables):
    exporter = ERA5Exporter(get_data_path())

    # The ERA5 exporter downloads the data with wierd names.
    # A mapping of actual variables to the downloaded variable
    # names is recorded here
    name2var = {
        "precip": "precip",
        "total_precipitation": "total_precipitation",
        "evaporation": "e",
        "mean_eastward_turbulent_surface_stress": "metss",
        "mean_northward_turbulent_surface_stress": "mntss",
        "potential_evaporation": "pev",
        "slhf": "surface_latent_heat_flux",
        "sp": "surface_pressure",
        "sshf": "surface_sensible_heat_flux",
        "ssrc": "surface_net_solar_radiation_clear_sky",
        "stl1": "soil_temperature_level_1",
        "strc": "surface_net_thermal_radiation_clear_sky",
        "swvl1": "volumetric_soil_water_layer_1",
        "swvl2": "volumetric_soil_water_layer_2",
        "swvl3": "volumetric_soil_water_layer_3",
        "swvl4": "volumetric_soil_water_layer_4",
        "t2m": "2m_temperature",
        "u10": "10m_u_component_of_wind",
        "v10": "10m_v_component_of_wind",
        "p84.162": "vertical_integral_of_divergence_of_moisture_flux",
        "VCI": "VCI",
    }

    for variable in variables:
        exporter.export(variable=variable, granularity="hourly", break_up=True)
Exemple #13
0
def export_s5(region_str="kenya"):

    granularity = "monthly"
    pressure_level = False

    exporter = S5Exporter(
        data_folder=get_data_path(),
        granularity=granularity,
        pressure_level=pressure_level,
    )
    min_year = 1993
    max_leadtime = None
    pressure_levels = None  # [200, 500, 925]
    n_parallel_requests = 1
    for variable in variables:
        print(f"\n\nWORKING ON: {variable}\n\n")
        exporter.export(
            variable=variable,
            min_year=min_year,
            max_year=max_year,
            min_month=min_month,
            max_month=max_month,
            max_leadtime=max_leadtime,
            pressure_levels=pressure_levels,
            n_parallel_requests=n_parallel_requests,
            region_str=region_str,
            break_up=False,
        )
Exemple #14
0
def process_esa_cci_landcover():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ESACCIPreprocessor(data_path)
    processor.preprocess(subset_str="kenya", regrid=regrid_path)
Exemple #15
0
def preprocess_era5():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ERA5MonthlyMeanPreprocessor(data_path)
    processor.preprocess(subset_str="kenya", regrid=regrid_path)
Exemple #16
0
def persistence(experiment="one_month_forecast", ):
    data_path = get_data_path()
    spatial_mask = data_path / "interim/boundaries_preprocessed/kenya_asal_mask.nc"
    spatial_mask = None
    predictor = Persistence(data_path,
                            experiment=experiment,
                            spatial_mask=spatial_mask)
    predictor.evaluate(save_preds=True)
Exemple #17
0
def preprocess_asal_mask():
    data_path = get_data_path()

    regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = KenyaASALMask(data_path)
    processor.preprocess(reference_nc_filepath=regrid_path)
Exemple #18
0
def models(target_var: str = "VCI1M"):
    # NO IGNORE VARS
    ignore_vars = None
    # drop the target variable from ignore_vars
    # ignore_vars = [v for v in ignore_vars if v != target_var]
    # assert target_var not in ignore_vars

    # -------------
    # persistence
    # -------------
    parsimonious()

    # regression(ignore_vars=ignore_vars)
    # gbdt(ignore_vars=ignore_vars)
    # linear_nn(ignore_vars=ignore_vars)

    # -------------
    # LSTM
    # -------------
    rnn(
        experiment="one_month_forecast",
        include_pred_month=True,
        surrounding_pixels=None,
        explain=False,
        static="features",
        ignore_vars=ignore_vars,
        num_epochs=50,
        early_stopping=5,
        hidden_size=256,
        include_latlons=True,
    )

    # -------------
    # EALSTM
    # -------------
    earnn(
        experiment="one_month_forecast",
        include_pred_month=True,
        surrounding_pixels=None,
        pretrained=False,
        explain=False,
        static="features",
        ignore_vars=ignore_vars,
        num_epochs=50,
        early_stopping=5,
        hidden_size=256,
        static_embedding_size=64,
        include_latlons=True,
    )

    # rename the output file
    data_path = get_data_path()

    _rename_directory(
        from_path=data_path / "models" / "one_month_forecast",
        to_path=data_path / "models" /
        f"one_month_forecast_BOKU_{target_var}_adede_only_vars",
    )
Exemple #19
0
def preprocess_era5(subset_str: str = "kenya"):
    data_path = get_data_path()

    # regrid_path = data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc"
    # assert regrid_path.exists(), f"{regrid_path} not available"
    regrid_path = None

    processor = ERA5MonthlyMeanPreprocessor(data_path)
    processor.preprocess(subset_str=subset_str, regrid=regrid_path)
Exemple #20
0
def process_precip_2018():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = CHIRPSPreprocessor(data_path)

    processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False)
Exemple #21
0
def move_features_dir(target_var):
    # rename the features dir
    data_path = get_data_path()

    _rename_directory(
        from_path=data_path / "features" / "one_month_forecast",
        to_path=data_path / "features" /
        f"one_month_forecast_BOKU_{target_var}_adede_only_vars",
    )
Exemple #22
0
def preprocess_boku_ndvi():
    data_path = get_data_path()
    processor = BokuNDVIPreprocessor(data_path)

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor.preprocess(subset_str="kenya",
                         resample_time="W-MON",
                         regrid=regrid_path)
Exemple #23
0
def engineer(pred_months=3, target_var="VCI1M"):
    engineer = Engineer(get_data_path(),
                        experiment="one_month_forecast",
                        process_static=False)
    engineer.engineer(
        test_year=[y for y in range(2016, 2019)],
        target_variable=target_var,
        pred_months=pred_months,
        expected_length=pred_months,
    )
Exemple #24
0
def process_seas5():
    data_path = get_data_path()

    regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = S5Preprocessor(data_path)
    processor.preprocess(
        subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False
    )
Exemple #25
0
def move_features_dir(target_var, adede_only=False, experiment_name=None):
    # rename the features dir
    data_path = get_data_path()
    if experiment_name is None:
        experiment_name = f"one_month_forecast_BOKU_{target_var}_our_vars_{'only_P_VCI' if adede_only else 'ALL'}"

    _rename_directory(
        from_path=data_path / "features" / "one_month_forecast",
        to_path=data_path / "features" / experiment_name,
    )
Exemple #26
0
def export_era5POS():
    exporter = ERA5ExporterPOS(get_data_path())

    variables = [
        "air_temperature_at_2_metres",
        "precipitation_amount_1hour_Accumulation",
    ]

    for variable in variables:
        exporter.export(variable=variable)
Exemple #27
0
def preprocess_era5_hourly():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ERA5HourlyPreprocessor(data_path)

    # W-MON is weekly each monday (the same as the NDVI data from Atzberger)
    processor.preprocess(subset_str="kenya", resample_time="W-MON")
Exemple #28
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=False,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    static_embedding_size=10,
    hidden_size=128,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
    normalize_y=True,
    include_prev_y=True,
    include_yearly_aggs=True,  # new
    clear_nans=True,
    weight_observations=False,
    pred_month_static=False,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=hidden_size,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            static=static,
            static_embedding_size=static_embedding_size,
            ignore_vars=ignore_vars,
            predict_delta=predict_delta,
            spatial_mask=spatial_mask,
            include_latlons=include_latlons,
            normalize_y=normalize_y,
            include_prev_y=include_prev_y,
            include_yearly_aggs=include_yearly_aggs,
            clear_nans=clear_nans,
            weight_observations=weight_observations,
            pred_month_static=pred_month_static,
        )
        predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    if explain:
        test_file = data_path / f"features/{experiment}/test/2018_3"
        assert test_file.exists()
        all_explanations_for_file(test_file, predictor, batch_size=100)
Exemple #29
0
def process_gleam():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = GLEAMPreprocessor(data_path)

    processor.preprocess(
        subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False
    )
Exemple #30
0
def preprocess_era5_hourly(subset_str: str = "kenya"):
    data_path = get_data_path()

    regrid_path = (
        data_path /
        f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc")
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ERA5HourlyPreprocessor(data_path)

    # W-MON is weekly each monday (the same as the NDVI data from Atzberger)
    processor.preprocess(subset_str=subset_str, resample_time="W-MON")