Esempio n. 1
0
 def __init__(self,
              hours_to_forecast: int,
              num_prediction_samples: int,
              model_params,
              csv_path: str,
              weight_path,
              wandb_proj: str = None):
     """
     Class to handle inference for models.
     """
     self.hours_to_forecast = hours_to_forecast
     self.csv_path = csv_path
     self.model = load_model(model_params.copy(), csv_path, weight_path)
     self.inference_params = model_params["inference_params"]
     if "scaling" in self.inference_params["dataset_params"]:
         s = scaling_function(
             {}, self.inference_params["dataset_params"])["scaling"]
         self.inference_params["dataset_params"]["scaling"] = s
     self.inference_params["hours_to_forecast"] = hours_to_forecast
     self.inference_params[
         "num_prediction_samples"] = num_prediction_samples
     if wandb_proj:
         date = datetime.now()
         wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod",
                    project=wandb_proj)
         wandb.config.update(model_params)
Esempio n. 2
0
    def __init__(self, forecast_steps: int, num_prediction_samples: int, model_params, csv_path: str, weight_path,
                 wandb_proj: str = None, torch_script=False):
        """Class to handle inference for models,

        :param forecasts_steps: Number of time-steps to forecast (doesn't have to be hours)
        :type forecast_steps: int
        :param num_prediction_samples: Number of prediction samples
        :type num_prediction_samples: int
        :param model_params: A dictionary of model parameters (ideally this should come from saved JSON config file)
        :type model_params: Dict
        :param csv_path: Path to the CSV test file you want to be used for inference. Evem of you aren't using
        :type csv_path: str
        :param weight_path: Path to the model weights
        :type weight_path: str
        :param wandb_proj: The name of the WB project leave blank if you don't want to log to Wandb, defaults to None
        :type wandb_proj: str, optionals
        """
        self.hours_to_forecast = forecast_steps
        self.csv_path = csv_path
        self.n_targets = model_params.get("n_targets")
        self.targ_cols = model_params["dataset_params"]["target_col"]
        self.model = load_model(model_params.copy(), csv_path, weight_path)
        self.inference_params = model_params["inference_params"]
        if "scaling" in self.inference_params["dataset_params"]:
            s = scaling_function({}, self.inference_params["dataset_params"])["scaling"]
            self.inference_params["dataset_params"]["scaling"] = s
        self.inference_params["hours_to_forecast"] = forecast_steps
        self.inference_params["num_prediction_samples"] = num_prediction_samples
        if wandb_proj:
            date = datetime.now()
            wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod", project=wandb_proj)
            wandb.config.update(model_params, allow_val_change=True)
Esempio n. 3
0
    def __init__(self,
                 hours_to_forecast: int,
                 num_prediction_samples: int,
                 model_params,
                 csv_path: str,
                 weight_path,
                 wandb_proj: str = None,
                 torch_script=False):
        """Class to handle inference for models

        :param hours_to_forecast: [description]
        :type hours_to_forecast: int
        :param num_prediction_samples: [description]
        :type num_prediction_samples: int
        :param model_params: [description]
        :type model_params: [type]
        :param csv_path: [description]
        :type csv_path: str
        :param weight_path: [description]
        :type weight_path: [type]
        :param wandb_proj: [description], defaults to None
        :type wandb_proj: str, optional
        """
        self.hours_to_forecast = hours_to_forecast
        self.csv_path = csv_path
        self.model = load_model(model_params.copy(), csv_path, weight_path)
        self.inference_params = model_params["inference_params"]
        if "scaling" in self.inference_params["dataset_params"]:
            s = scaling_function(
                {}, self.inference_params["dataset_params"])["scaling"]
            self.inference_params["dataset_params"]["scaling"] = s
        self.inference_params["hours_to_forecast"] = hours_to_forecast
        self.inference_params[
            "num_prediction_samples"] = num_prediction_samples
        if wandb_proj:
            date = datetime.now()
            wandb.init(name=date.strftime("%H-%M-%D-%Y") + "_prod",
                       project=wandb_proj)
            wandb.config.update(model_params)
Esempio n. 4
0
def train_function(model_type: str, params: Dict):
    """
    Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model
    model_type str: Type of the model (for now) must be da_rnn or
    :params dict: Dictionary containing all the parameters needed to run the model
    """
    dataset_params = params["dataset_params"]
    if model_type == "da_rnn":
        from flood_forecast.da_rnn.train_da import da_rnn, train
        from flood_forecast.preprocessing.preprocess_da_rnn import make_data
        preprocessed_data = make_data(
            params["dataset_params"]["training_path"],
            params["dataset_params"]["target_col"],
            params["dataset_params"]["forecast_length"])
        config, model = da_rnn(preprocessed_data,
                               len(dataset_params["target_col"]))
        # All train functions return trained_model
        trained_model = train(model, preprocessed_data, config)
    elif model_type == "PyTorch":
        trained_model = PyTorchForecast(params["model_name"],
                                        dataset_params["training_path"],
                                        dataset_params["validation_path"],
                                        dataset_params["test_path"], params)
        train_transformer_style(trained_model, params["training_params"],
                                params["forward_params"])
        # To do delete
        if "scaler" in dataset_params:
            if "scaler_params" in dataset_params:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            else:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
        test_acc = evaluate_model(trained_model, model_type,
                                  params["dataset_params"]["target_col"],
                                  params["metrics"],
                                  params["inference_params"], {})
        wandb.run.summary["test_accuracy"] = test_acc[0]
        df_train_and_test = test_acc[1]
        forecast_start_idx = test_acc[2]
        df_prediction_samples = test_acc[3]
        mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] -
               df_train_and_test.loc[forecast_start_idx:,
                                     params["dataset_params"]["target_col"][0]]
               ).abs()
        inverse_mae = 1 / mae
        pred_std = df_prediction_samples.std(axis=1)
        average_prediction_sharpe = (inverse_mae / pred_std).mean()
        wandb.log({'average_prediction_sharpe': average_prediction_sharpe})

        # Log plots
        if "probabilistic" in params["inference_params"]:
            test_plot = plot_df_test_with_probabilistic_confidence_interval(
                df_train_and_test,
                forecast_start_idx,
                params,
            )
        else:
            test_plot = plot_df_test_with_confidence_interval(
                df_train_and_test,
                df_prediction_samples,
                forecast_start_idx,
                params,
                ci=95,
                alpha=0.25)
        wandb.log({"test_plot": test_plot})

        test_plot_all = go.Figure()
        for relevant_col in params["dataset_params"]["relevant_cols"]:
            test_plot_all.add_trace(
                go.Scatter(x=df_train_and_test.index,
                           y=df_train_and_test[relevant_col],
                           name=relevant_col))
        wandb.log({"test_plot_all": test_plot_all})
    else:
        raise Exception("Please supply valid model type for forecasting")
    return trained_model
Esempio n. 5
0
def train_function(model_type: str, params: Dict):
    """Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model
    :param model_type: Type of the model. In almost all cases this will be 'PyTorch'
    :type model_type: str
    :param params: Dictionary containing all the parameters needed to run the model
    :type Dict:
    """
    dataset_params = params["dataset_params"]
    if model_type == "da_rnn":
        from flood_forecast.da_rnn.train_da import da_rnn, train
        from flood_forecast.preprocessing.preprocess_da_rnn import make_data
        preprocessed_data = make_data(
            params["dataset_params"]["training_path"],
            params["dataset_params"]["target_col"],
            params["dataset_params"]["forecast_length"])
        config, model = da_rnn(preprocessed_data,
                               len(dataset_params["target_col"]))
        # All train functions return trained_model
        trained_model = train(model, preprocessed_data, config)
    elif model_type == "PyTorch":
        trained_model = PyTorchForecast(params["model_name"],
                                        dataset_params["training_path"],
                                        dataset_params["validation_path"],
                                        dataset_params["test_path"], params)
        takes_target = False
        if "takes_target" in trained_model.params:
            takes_target = trained_model.params["takes_target"]
        train_transformer_style(model=trained_model,
                                training_params=params["training_params"],
                                takes_target=takes_target,
                                forward_params=params["forward_params"])
        # To do delete
        if "scaler" in dataset_params:
            if "scaler_params" in dataset_params:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            else:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            params["inference_params"]["dataset_params"].pop(
                'scaler_params', None)
        test_acc = evaluate_model(trained_model, model_type,
                                  params["dataset_params"]["target_col"],
                                  params["metrics"],
                                  params["inference_params"], {})
        wandb.run.summary["test_accuracy"] = test_acc[0]
        df_train_and_test = test_acc[1]
        forecast_start_idx = test_acc[2]
        df_prediction_samples = test_acc[3]
        mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] -
               df_train_and_test.loc[forecast_start_idx:,
                                     params["dataset_params"]["target_col"][0]]
               ).abs()
        inverse_mae = 1 / mae
        i = 0
        for df in df_prediction_samples:
            pred_std = df.std(axis=1)
            average_prediction_sharpe = (inverse_mae / pred_std).mean()
            wandb.log({
                'average_prediction_sharpe' + str(i):
                average_prediction_sharpe
            })
            i += 1
        df_train_and_test.to_csv("temp_preds.csv")
        # Log plots now
        if "probabilistic" in params["inference_params"]:
            test_plot = plot_df_test_with_probabilistic_confidence_interval(
                df_train_and_test,
                forecast_start_idx,
                params,
            )
        elif len(df_prediction_samples) > 0:
            for thing in zip(df_prediction_samples,
                             params["dataset_params"]["target_col"]):
                thing[0].to_csv(thing[1] + ".csv")
                test_plot = plot_df_test_with_confidence_interval(
                    df_train_and_test,
                    thing[0],
                    forecast_start_idx,
                    params,
                    targ_col=thing[1],
                    ci=95,
                    alpha=0.25)
                wandb.log({"test_plot_" + thing[1]: test_plot})
        else:
            pd.options.plotting.backend = "plotly"
            t = params["dataset_params"]["target_col"][0]
            test_plot = df_train_and_test[[t, "preds"]].plot()
            wandb.log({"test_plot_" + t: test_plot})
        print("Now plotting final plots")
        test_plot_all = go.Figure()
        for relevant_col in params["dataset_params"]["relevant_cols"]:
            test_plot_all.add_trace(
                go.Scatter(x=df_train_and_test.index,
                           y=df_train_and_test[relevant_col],
                           name=relevant_col))
        wandb.log({"test_plot_all": test_plot_all})
    else:
        raise Exception("Please supply valid model type for forecasting")
    return trained_model
Esempio n. 6
0
def train_function(model_type: str, params: Dict) -> PyTorchForecast:
    """Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model
    
    :param model_type: Type of the model. In almost all cases this will be 'PyTorch'
    :type model_type: str
    :param params: Dictionary containing all the parameters needed to run the model
    :type Dict:
    :return: A trained model
    
    .. code-block:: python 
        
        with open("model_config.json") as f: 
            params_dict = json.load(f)
        train_function("PyTorch", params_dict)

    ...

    For information on what this params_dict should include see `Confluence pages <https://flow-forecast.atlassian.net/wiki/spaces/FF/pages/92864513/Getting+Started>`_ on training models. 
    """
    dataset_params = params["dataset_params"]
    if model_type == "da_rnn":
        from flood_forecast.da_rnn.train_da import da_rnn, train
        from flood_forecast.preprocessing.preprocess_da_rnn import make_data
        preprocessed_data = make_data(
            params["dataset_params"]["training_path"],
            params["dataset_params"]["target_col"],
            params["dataset_params"]["forecast_length"])
        config, model = da_rnn(preprocessed_data,
                               len(dataset_params["target_col"]))
        # All train functions return trained_model
        trained_model = train(model, preprocessed_data, config)
    elif model_type == "PyTorch":
        dataset_params["batch_size"] = params["training_params"]["batch_size"]
        trained_model = PyTorchForecast(params["model_name"],
                                        dataset_params["training_path"],
                                        dataset_params["validation_path"],
                                        dataset_params["test_path"], params)
        class2 = False if trained_model.params["dataset_params"][
            "class"] != "GeneralClassificationLoader" else True
        takes_target = False
        if "takes_target" in trained_model.params:
            takes_target = trained_model.params["takes_target"]

        if "inference_params" in trained_model.params:
            if "dataset_params" not in trained_model.params[
                    "inference_params"]:
                print("Using generic dataset params")
                trained_model.params["inference_params"][
                    "dataset_params"] = trained_model.params[
                        "dataset_params"].copy()
                del trained_model.params["inference_params"]["dataset_params"][
                    "class"]
                # noqa: F501
                trained_model.params["inference_params"]["dataset_params"][
                    "interpolate_param"] = trained_model.params[
                        "inference_params"]["dataset_params"].pop(
                            "interpolate")
                trained_model.params["inference_params"]["dataset_params"][
                    "scaling"] = trained_model.params["inference_params"][
                        "dataset_params"].pop("scaler")
                if "feature_param" in trained_model.params["dataset_params"]:
                    trained_model.params["inference_params"]["dataset_params"][
                        "feature_params"] = trained_model.params[
                            "inference_params"]["dataset_params"].pop(
                                "feature_param")
                delete_params = [
                    "num_workers", "pin_memory", "train_start", "train_end",
                    "valid_start", "valid_end", "test_start", "test_end",
                    "training_path", "validation_path", "test_path",
                    "batch_size"
                ]
                for param in delete_params:
                    if param in trained_model.params["inference_params"][
                            "dataset_params"]:
                        del trained_model.params["inference_params"][
                            "dataset_params"][param]
        train_transformer_style(model=trained_model,
                                training_params=params["training_params"],
                                takes_target=takes_target,
                                forward_params={},
                                class2=class2)
        if "scaler" in dataset_params and "inference_params" in params:
            if "scaler_params" in dataset_params:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            else:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            params["inference_params"]["dataset_params"].pop(
                'scaler_params', None)
        # TODO Move to other func
        if params["dataset_params"]["class"] != "GeneralClassificationLoader":
            handle_model_evaluation1(trained_model, params, model_type)

    else:
        raise Exception(
            "Please supply valid model type for forecasting or classification")
    return trained_model