Example #1
0
 def make_plots(self,
                date: datetime,
                csv_path: str,
                csv_bucket: str = None,
                save_name=None):
     df, tensor, history, forecast_start, test, samples = self.infer_now(
         date, csv_path, csv_bucket, save_name)
     plot_df_test_with_confidence_interval(df, samples, forecast_start,
                                           self.model.params)
     return tensor, history, test
Example #2
0
    def make_plots(self,
                   date: datetime,
                   csv_path: str = None,
                   csv_bucket: str = None,
                   save_name=None,
                   wandb_plot_id=None):
        """

        :param date: [description]
        :type date: datetime
        :param csv_path: [description], defaults to None
        :type csv_path: str, optional
        :param csv_bucket: [description], defaults to None
        :type csv_bucket: str, optional
        :param save_name: [description], defaults to None
        :type save_name: [type], optional
        :param wandb_plot_id: [description], defaults to None
        :type wandb_plot_id: [type], optional
        :return: [description]
        :rtype: [type]
        """
        if csv_path is None:
            csv_path = self.csv_path
        df, tensor, history, forecast_start, test, samples = self.infer_now(
            date, csv_path, csv_bucket, save_name)
        plt = {}
        for sample in samples:
            plt = plot_df_test_with_confidence_interval(
                df, sample, forecast_start, self.model.params)
            if wandb_plot_id:
                wandb.log({wandb_plot_id: plt})
                deep_explain_model_summary_plot(self.model, test, date)
                deep_explain_model_heatmap(self.model, test, date)
        return tensor, history, test, plt
Example #3
0
    def make_plots(self, date: datetime, csv_path: str = None, csv_bucket: str = None,
                   save_name=None, wandb_plot_id=None):
        """Function to create plots in inference mode.

        :param date: The datetime to start inference
        :type date: datetime
        :param csv_path: The path to the CSV file you want to use for inference, defaults to None
        :type csv_path: str, optional
        :param csv_bucket: [description], defaults to None
        :type csv_bucket: str, optional
        :param save_name: [description], defaults to None
        :type save_name: [type], optional
        :param wandb_plot_id: [description], defaults to None
        :type wandb_plot_id: [type], optional
        :return: [description]
        :rtype: tuple(torch.Tensor, torch.Tensor, CSVTestLoader, matplotlib.pyplot.plot)
        """
        if csv_path is None:
            csv_path = self.csv_path
        df, tensor, history, forecast_start, test, samples = self.infer_now(date, csv_path, csv_bucket, save_name)
        plt = {}
        for sample, targ in zip(samples, self.model.params["dataset_params"]["target_col"]):
            plt = plot_df_test_with_confidence_interval(df, sample, forecast_start, self.model.params, targ)
            if wandb_plot_id:
                wandb.log({wandb_plot_id + targ: plt})
                if not self.n_targets:
                    deep_explain_model_summary_plot(self.model, test, date)
                    deep_explain_model_heatmap(self.model, test, date)
        return tensor, history, test, plt
Example #4
0
def train_function(model_type: str, params:Dict):
    """
    Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model
    model_type str: Type of the model (for now) must be da_rnn or 
    :params dict: Dictionary containing all the parameters needed to run the model
    """
    dataset_params = params["dataset_params"]
    if model_type == "da_rnn":
        from flood_forecast.da_rnn.train_da import da_rnn, train
        from flood_forecast.preprocessing.preprocess_da_rnn import make_data
        preprocessed_data = make_data(params["dataset_params"]["training_path"], params["dataset_params"]["target_col"], params["dataset_params"]["forecast_length"])
        config, model = da_rnn(preprocessed_data, len(dataset_params["target_col"]))
        # All train functions return trained_model
        trained_model = train(model, preprocessed_data, config)
    elif model_type == "PyTorch":
        trained_model = PyTorchForecast(
            params["model_name"],
            dataset_params["training_path"],
            dataset_params["validation_path"],
            dataset_params["test_path"],
            params)
        train_transformer_style(trained_model, params["training_params"], params["forward_params"])
        params["inference_params"]["dataset_params"]["scaling"] = scaler_dict[dataset_params["scaler"]]
        test_acc = evaluate_model(
            trained_model,
            model_type,
            params["dataset_params"]["target_col"],
            params["metrics"],
            params["inference_params"],
            {})
        wandb.run.summary["test_accuracy"] = test_acc[0]
        df_train_and_test = test_acc[1]
        forecast_start_idx = test_acc[2]
        df_prediction_samples = test_acc[3]
        inverse_mae = 1 / (
                df_train_and_test.loc[forecast_start_idx:, "preds"] -
                df_train_and_test.loc[forecast_start_idx:, params["dataset_params"]["target_col"][0]]).abs()
        pred_std = df_prediction_samples.std(axis=1)
        average_prediction_sharpe = (inverse_mae / pred_std).mean()
        wandb.log({'average_prediction_sharpe': average_prediction_sharpe})

        # Log plots
        test_plot = plot_df_test_with_confidence_interval(
            df_train_and_test,
            df_prediction_samples,
            forecast_start_idx,
            params,
            ci=95,
            alpha=0.25)
        wandb.log({"test_plot": test_plot})

        test_plot_all = go.Figure()
        for relevant_col in params["dataset_params"]["relevant_cols"]:
            test_plot_all.add_trace(go.Scatter(x=df_train_and_test.index, y=df_train_and_test[relevant_col], name=relevant_col))
        wandb.log({"test_plot_all": test_plot_all})
    else:
        raise Exception("Please supply valid model type for forecasting")
    return trained_model
Example #5
0
 def make_plots(self, date: datetime, csv_path: str = None, csv_bucket: str = None,
                save_name=None, wandb_plot_id=None):
     if csv_path is None:
         csv_path = self.csv_path
     df, tensor, history, forecast_start, test, samples = self.infer_now(date, csv_path, csv_bucket, save_name)
     plt = plot_df_test_with_confidence_interval(df, samples, forecast_start, self.model.params)
     if wandb_plot_id:
         wandb.log({wandb_plot_id: plt})
     return tensor, history, test, plt
Example #6
0
def train_function(model_type: str, params: Dict):
    """Function to train a Model(TimeSeriesModel) or da_rnn. Will return the trained model
    :param model_type: Type of the model. In almost all cases this will be 'PyTorch'
    :type model_type: str
    :param params: Dictionary containing all the parameters needed to run the model
    :type Dict:
    """
    dataset_params = params["dataset_params"]
    if model_type == "da_rnn":
        from flood_forecast.da_rnn.train_da import da_rnn, train
        from flood_forecast.preprocessing.preprocess_da_rnn import make_data
        preprocessed_data = make_data(
            params["dataset_params"]["training_path"],
            params["dataset_params"]["target_col"],
            params["dataset_params"]["forecast_length"])
        config, model = da_rnn(preprocessed_data,
                               len(dataset_params["target_col"]))
        # All train functions return trained_model
        trained_model = train(model, preprocessed_data, config)
    elif model_type == "PyTorch":
        trained_model = PyTorchForecast(params["model_name"],
                                        dataset_params["training_path"],
                                        dataset_params["validation_path"],
                                        dataset_params["test_path"], params)
        takes_target = False
        if "takes_target" in trained_model.params:
            takes_target = trained_model.params["takes_target"]
        train_transformer_style(model=trained_model,
                                training_params=params["training_params"],
                                takes_target=takes_target,
                                forward_params=params["forward_params"])
        # To do delete
        if "scaler" in dataset_params:
            if "scaler_params" in dataset_params:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            else:
                params["inference_params"]["dataset_params"][
                    "scaling"] = scaling_function({},
                                                  dataset_params)["scaling"]
            params["inference_params"]["dataset_params"].pop(
                'scaler_params', None)
        test_acc = evaluate_model(trained_model, model_type,
                                  params["dataset_params"]["target_col"],
                                  params["metrics"],
                                  params["inference_params"], {})
        wandb.run.summary["test_accuracy"] = test_acc[0]
        df_train_and_test = test_acc[1]
        forecast_start_idx = test_acc[2]
        df_prediction_samples = test_acc[3]
        mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] -
               df_train_and_test.loc[forecast_start_idx:,
                                     params["dataset_params"]["target_col"][0]]
               ).abs()
        inverse_mae = 1 / mae
        i = 0
        for df in df_prediction_samples:
            pred_std = df.std(axis=1)
            average_prediction_sharpe = (inverse_mae / pred_std).mean()
            wandb.log({
                'average_prediction_sharpe' + str(i):
                average_prediction_sharpe
            })
            i += 1
        df_train_and_test.to_csv("temp_preds.csv")
        # Log plots now
        if "probabilistic" in params["inference_params"]:
            test_plot = plot_df_test_with_probabilistic_confidence_interval(
                df_train_and_test,
                forecast_start_idx,
                params,
            )
        elif len(df_prediction_samples) > 0:
            for thing in zip(df_prediction_samples,
                             params["dataset_params"]["target_col"]):
                thing[0].to_csv(thing[1] + ".csv")
                test_plot = plot_df_test_with_confidence_interval(
                    df_train_and_test,
                    thing[0],
                    forecast_start_idx,
                    params,
                    targ_col=thing[1],
                    ci=95,
                    alpha=0.25)
                wandb.log({"test_plot_" + thing[1]: test_plot})
        else:
            pd.options.plotting.backend = "plotly"
            t = params["dataset_params"]["target_col"][0]
            test_plot = df_train_and_test[[t, "preds"]].plot()
            wandb.log({"test_plot_" + t: test_plot})
        print("Now plotting final plots")
        test_plot_all = go.Figure()
        for relevant_col in params["dataset_params"]["relevant_cols"]:
            test_plot_all.add_trace(
                go.Scatter(x=df_train_and_test.index,
                           y=df_train_and_test[relevant_col],
                           name=relevant_col))
        wandb.log({"test_plot_all": test_plot_all})
    else:
        raise Exception("Please supply valid model type for forecasting")
    return trained_model
Example #7
0
 def test_plot_df_test_with_confidence_interval_df_preds_empty(self):
     params = {'dataset_params': {'target_col': ['target_col']}}
     fig = plot_df_test_with_confidence_interval(
         self.df_test, self.df_preds_empty, 0, params, "target_col", 95)
     self.assertIsInstance(fig, go.Figure)
def handle_model_evaluation1(trained_model, params: Dict,
                             model_type: str) -> None:
    """Utility function to help handle model evaluation. Primarily used at the moment for forcast

    :param trained_model: A PyTorchForecast model that has already been trained. 
    :type trained_model: PyTorchForecast
    :param params: A dictionary of the trained model parameters.
    :type params: Dict
    :param model_type: The type of model. Almost always PyTorch in practice.
    :type model_type: str
    """
    test_acc = evaluate_model(trained_model, model_type,
                              params["dataset_params"]["target_col"],
                              params["metrics"], params["inference_params"],
                              {})
    wandb.run.summary["test_accuracy"] = test_acc[0]
    df_train_and_test = test_acc[1]
    forecast_start_idx = test_acc[2]
    df_prediction_samples = test_acc[3]
    mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] -
           df_train_and_test.loc[forecast_start_idx:,
                                 params["dataset_params"]["target_col"][0]]
           ).abs()
    inverse_mae = 1 / mae
    i = 0
    for df in df_prediction_samples:
        pred_std = df.std(axis=1)
        average_prediction_sharpe = (inverse_mae / pred_std).mean()
        wandb.log(
            {'average_prediction_sharpe' + str(i): average_prediction_sharpe})
        i += 1
    df_train_and_test.to_csv("temp_preds.csv")
    # Log plots now
    if "probabilistic" in params["inference_params"]:
        test_plot = plot_df_test_with_probabilistic_confidence_interval(
            df_train_and_test,
            forecast_start_idx,
            params,
        )
    elif len(df_prediction_samples) > 0:
        for thing in zip(df_prediction_samples,
                         params["dataset_params"]["target_col"]):
            thing[0].to_csv(thing[1] + ".csv")
            test_plot = plot_df_test_with_confidence_interval(
                df_train_and_test,
                thing[0],
                forecast_start_idx,
                params,
                targ_col=thing[1],
                ci=95,
                alpha=0.25)
            wandb.log({"test_plot_" + thing[1]: test_plot})
    else:
        pd.options.plotting.backend = "plotly"
        t = params["dataset_params"]["target_col"][0]
        test_plot = df_train_and_test[[t, "preds"]].plot()
        wandb.log({"test_plot_" + t: test_plot})
    print("Now plotting final plots")
    test_plot_all = go.Figure()
    for relevant_col in params["dataset_params"]["relevant_cols"]:
        test_plot_all.add_trace(
            go.Scatter(x=df_train_and_test.index,
                       y=df_train_and_test[relevant_col],
                       name=relevant_col))
    wandb.log({"test_plot_all": test_plot_all})