Beispiel #1
0
    def evaluate(self, x, target, metrics=['mse']):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param target: target for evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        horizon = len(target)
        target = target[['y']]
        future = self.model.make_future_dataframe(periods=horizon)
        target_pred = self.predict(horizon=horizon)[['yhat']]
        return [
            Evaluator.evaluate(m, target.values, target_pred.values)
            for m in metrics
        ]
Beispiel #2
0
 def evaluate(self, x, y, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     # y = np.squeeze(y, axis=2)
     if self.target_col_num == 1:
         return [Evaluator.evaluate(m, y, y_pred) for m in metric]
     else:
         return [np.array([Evaluator.evaluate(m, y[:, i, :], y_pred[:, i, :])
                           for i in range(self.future_seq_len)])
                 for m in metric]
Beispiel #3
0
    def evaluate(self, input_df, metrics=["mse"], multioutput='raw_values'):
        """
        evaluate the pipeline
        :param input_df:
        :param metrics: subset of ['mean_squared_error', 'r_square', 'sMAPE']
        :param multioutput: string in ['raw_values', 'uniform_average']
                'raw_values' :
                    Returns a full set of errors in case of multioutput input.
                'uniform_average' :
                    Errors of all outputs are averaged with uniform weight.
        :return:
        """
        if isinstance(metrics, str):
            metrics = [metrics]
        # if not isinstance(metrics, list):
        #    raise ValueError("Expected metrics to be a list!")

        x, y = self.feature_transformers.transform(input_df, is_train=True)
        y_pred = self.model.predict(x)
        if y_pred.shape[1] == 1:
            multioutput = 'uniform_average'
        y_unscale, y_pred_unscale = self.feature_transformers.post_processing(
            input_df, y_pred, is_train=True)

        return [
            Evaluator.evaluate(m,
                               y_unscale,
                               y_pred_unscale,
                               multioutput=multioutput) for m in metrics
        ]
Beispiel #4
0
    def test_evaluate_predict_future_more_1(self):
        target_col = "values"
        metrics = ["mse", "r2"]
        future_seq_len = np.random.randint(2, 6)
        train_df, test_df, tsp, test_sample_num = self.get_input_tsp(
            future_seq_len, target_col)
        pipeline = tsp.fit(train_df, test_df)
        mse, rs = pipeline.evaluate(test_df, metrics=metrics)
        assert len(mse) == future_seq_len
        assert len(rs) == future_seq_len
        y_pred = pipeline.predict(test_df)
        assert y_pred.shape == (test_sample_num - default_past_seq_len + 1,
                                future_seq_len + 1)

        y_pred_df = pipeline.predict(test_df[:-future_seq_len])
        columns = [
            "{}_{}".format(target_col, i) for i in range(future_seq_len)
        ]
        y_pred_value = y_pred_df[columns].values

        y_df = test_df[default_past_seq_len:]
        y_value = TimeSequenceFeatureTransformer()._roll_test(
            y_df[target_col], future_seq_len)

        mse_pred_eval, rs_pred_eval = [
            Evaluator.evaluate(m, y_value, y_pred_value) for m in metrics
        ]
        mse_eval, rs_eval = pipeline.evaluate(test_df, metrics)
        assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2)
        assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2)
    def evaluate(self,
                 data,
                 metrics=['mse'],
                 multioutput="uniform_average",
                 batch_size=32):
        '''
        Evaluate the time series pipeline.

        :param data: data can be a TSDataset or data creator(will be supported).
               The TSDataset should follow the same operations as the training
               TSDataset used in AutoTSEstimator.fit.
        :param metrics: list. The evaluation metric name to optimize. e.g. ["mse"]
        :param multioutput: Defines aggregating of multiple output values.
               String in ['raw_values', 'uniform_average']. The value defaults to
               'uniform_average'.
        :param batch_size: predict batch_size, the process will cost more time
               if batch_size is small while cost less memory. The param is only
               effective when data is a TSDataset. The values defaults to 32.
        '''
        _, y = self._tsdataset_to_numpy(data, is_predict=False)
        yhat = self.predict(data, batch_size=batch_size)
        if self._scaler:
            from zoo.chronos.data.utils.scale import unscale_timeseries_numpy
            y = unscale_timeseries_numpy(y, self._scaler, self._scaler_index)
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat[:y.shape[0]],
                               multioutput=multioutput) for m in metrics
        ]
        return eval_result
Beispiel #6
0
    def evaluate_with_onnx(self,
                           data,
                           metrics=['mse'],
                           multioutput="uniform_average",
                           batch_size=32):
        '''
        Evaluate the time series pipeline with onnx.

        :param data: data can be a TSDataset or data creator(will be supported).
               The TSDataset should follow the same operations as the training
               TSDataset used in AutoTSEstimator.fit.
        :param metrics: list. The evaluation metric name to optimize. e.g. ["mse"]
        :param multioutput: Defines aggregating of multiple output values.
               String in ['raw_values', 'uniform_average']. The value defaults to
               'uniform_average'.
        :param batch_size: predict batch_size, the process will cost more time
               if batch_size is small while cost less memory. The param is only
               effective when data is a TSDataset. The values defaults to 32.
        '''
        # predict with onnx
        x, y = self._tsdataset_to_numpy(data, is_predict=False)
        yhat = self._best_model.predict_with_onnx(x, batch_size=batch_size)
        yhat = self._tsdataset_unscale(yhat)
        # unscale
        y = self._tsdataset_unscale(y)
        # evaluate
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat,
                               multioutput=multioutput) for m in metrics
        ]
        return eval_result
Beispiel #7
0
    def evaluate(self, x, y, metrics=['mse']):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if x is None:
            raise ValueError("Input invalid x of None")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        if isinstance(y, pd.DataFrame):
            y = y.values
        self.model.n_jobs = self.n_jobs
        y_pred = self.predict(x)

        result_list = []
        for metric in metrics:
            if callable(metric):
                result_list.append(metric(y, y_pred))
            else:
                result_list.append(Evaluator.evaluate(metric, y, y_pred))
        return result_list
Beispiel #8
0
    def evaluate(self, x=None, y=None, metrics=None, num_workers=None):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :param num_workers: the number of workers to use in evaluate. It defaults to 1.
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x directly.")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
            horizon = 1
        else:
            horizon = y.shape[1]
        result = self.predict(x=None, horizon=horizon, num_workers=num_workers)

        if y.shape[1] == 1:
            multioutput = 'uniform_average'
        else:
            multioutput = 'raw_values'
        return [
            Evaluator.evaluate(m, y, result, multioutput=multioutput)
            for m in metrics
        ]
Beispiel #9
0
    def evaluate_with_onnx(self, x, y, metrics=['mse'], dirname=None, multioutput="raw_values"):
        # reshape 1dim input
        x = self._reshape_input(x)
        y = self._reshape_input(y)

        yhat = self.predict_with_onnx(x, dirname=dirname)
        eval_result = [Evaluator.evaluate(m, y_true=y, y_pred=yhat, multioutput=multioutput)
                       for m in metrics]
        return eval_result
 def evaluate(self, x, y, metric=['mse']):
     yhat = self.predict(x)
     eval_result = [
         Evaluator.evaluate(m,
                            y_true=y,
                            y_pred=yhat,
                            multioutput="raw_values") for m in metric
     ]
     return eval_result
 def _validate(self, x, y, metric):
     self.model.eval()
     with torch.no_grad():
         yhat = self.model(x)
         val_loss = self.criterion(yhat, y)
         eval_result = Evaluator.evaluate(metric=metric,
                                          y_true=y.numpy(),
                                          y_pred=yhat.numpy(),
                                          multioutput='uniform_average')
     return {"val_loss": val_loss.item(), metric: eval_result}
Beispiel #12
0
 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metrics: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     return [Evaluator.evaluate(m, y, y_pred) for m in metrics]
Beispiel #13
0
    def evaluate(self,
                 x=None,
                 y=None,
                 metrics=None,
                 target_covariates=None,
                 target_dti=None,
                 num_workers=None):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :param target_covariates: covariates corresponding to target_value.
            2-D ndarray or None.
            The shape of ndarray should be (r, horizon), where r is the number of covariates.
            Global covariates for all time series. If None, only default time coveriates will be
            used while use_time is True. If not, the time coveriates used is the stack of input
            covariates and default time coveriates.
        :param target_dti: dti corresponding to target_value.
            DatetimeIndex or None.
            If None, use default fixed frequency DatetimeIndex generated with the last date of x in
            fit and freq.
        :param num_workers: the number of workers to use in evaluate. It defaults to 1.
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x directly.")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
            horizon = 1
        else:
            horizon = y.shape[1]
        result = self.predict(x=None,
                              horizon=horizon,
                              future_covariates=target_covariates,
                              future_dti=target_dti,
                              num_workers=num_workers)

        if y.shape[1] == 1:
            multioutput = 'uniform_average'
        else:
            multioutput = 'raw_values'
        return [
            Evaluator.evaluate(m, y, result, multioutput=multioutput)
            for m in metrics
        ]
Beispiel #14
0
 def evaluate(self, x, y, metric=['mean_squared_error']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     e = Evaluator()
     y_pred = self.predict(x)
     return [e.evaluate(m, y, y_pred) for m in metric]
    def evaluate(self, x, y, metrics=['mse']):
        # reshape 1dim input
        x = self._reshape_input(x)
        y = self._reshape_input(y)

        yhat = self.predict(x)
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat,
                               multioutput="raw_values") for m in metrics
        ]
        return eval_result
Beispiel #16
0
 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     if y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     # y = np.squeeze(y, axis=2)
     return [Evaluator.evaluate(m, y, y_pred, multioutput=multioutput) for m in metrics]
 def _validate(self, validation_loader, metric):
     self.model.eval()
     with torch.no_grad():
         yhat_list = []
         y_list = []
         for x_valid_batch, y_valid_batch in validation_loader:
             yhat_list.append(self.model(x_valid_batch).numpy())
             y_list.append(y_valid_batch.numpy())
         yhat = np.concatenate(yhat_list, axis=0)
         y = np.concatenate(y_list, axis=0)
     # val_loss = self.criterion(yhat, y)
     eval_result = Evaluator.evaluate(metric=metric,
                                      y_true=y,
                                      y_pred=yhat,
                                      multioutput='uniform_average')
     return {metric: eval_result}
 def evaluate(self, df, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     if isinstance(metric, str):
         metric = [metric]
     x, y = self._process_data(df, mode="val")
     y_pred = self.model.predict(x)
     y_unscale, y_pred_unscale = self.ft.post_processing(df, y_pred, is_train=True)
     if len(y_pred.shape) > 1 and y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     return [Evaluator.evaluate(m, y_unscale, y_pred_unscale, multioutput=multioutput)
             for m in metric]
Beispiel #19
0
 def _validate(self, validation_loader, metric_name, metric_func=None):
     if not metric_name:
         assert metric_func, "You must input valid metric_func or metric_name"
         metric_name = metric_func.__name__
     self.model.eval()
     with torch.no_grad():
         yhat_list = []
         y_list = []
         for x_valid_batch, y_valid_batch in validation_loader:
             yhat_list.append(self.model(x_valid_batch).numpy())
             y_list.append(y_valid_batch.numpy())
         yhat = np.concatenate(yhat_list, axis=0)
         y = np.concatenate(y_list, axis=0)
     # val_loss = self.criterion(yhat, y)
     if metric_func:
         eval_result = metric_func(y, yhat)
     else:
         eval_result = Evaluator.evaluate(metric=metric_name,
                                          y_true=y, y_pred=yhat,
                                          multioutput='uniform_average')
     return {metric_name: eval_result}
    def evaluate(self, x, y, metrics=['mse']):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if x is None:
            raise ValueError("Input invalid x of None")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        y_pred = self.predict(x)
        return [Evaluator.evaluate(m, y, y_pred) for m in metrics]
Beispiel #21
0
    def evaluate(self, target, data=None, metrics=['mse']):
        """
        Evaluate on the prediction results. We predict horizon time-points ahead the input data
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        target.
        :param data: Prophet predicts the horizon steps foreward from the training data.
            So data should be None as it is not used.
        :param target: target for evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if data is not None:
            raise ValueError("We don't support input data currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception("Needs to call fit_eval or restore first before calling evaluate")

        horizon = len(target)
        future = self.model.make_future_dataframe(periods=horizon)
        target_pred = self.predict(horizon=horizon)[['yhat']]
        return [Evaluator.evaluate(m, target[['y']].values, target_pred.values) for m in metrics]
Beispiel #22
0
    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]
    if args.predict_local:
        logger.info(
            'Stopping context for yarn cluster and init context on local.')
        stop_orca_context()
        import ray
        ray.init(num_cpus=args.num_predict_cores)

    logger.info('Start prediction.')
    yhat = model.predict(x=None,
                         horizon=24,
                         num_workers=args.num_predict_workers
                         if args.predict_local else args.num_workers)
    logger.info("Prediction ends")
    yhat = yhat["prediction"]
    target_value = dict({"y": target_data})

    # evaluate with prediction results
    from zoo.automl.common.metrics import Evaluator
    evaluate_mse = Evaluator.evaluate("mse", target_data, yhat)

    # You can also evaluate directly without prediction results.
    mse, smape = model.evaluate(x=None,
                                target_value=target_value,
                                metric=['mse', 'smape'])
    print(f"Evaluation results: mse: {mse}, smape: {smape}")

    logger.info("Evaluation ends")

    stop_orca_context()
Beispiel #24
0
            lr=lr,
            min_child_weight=min_child_weight)
        search_alg = None
        search_alg_params = None
        scheduler = None
        scheduler_params = None

    auto_xgb_reg = AutoXGBRegressor(cpus_per_trial=2,
                                    name="auto_xgb_regressor",
                                    **config)
    auto_xgb_reg.fit(data=(X_train, y_train),
                     validation_data=(X_val, y_val),
                     metric="rmse",
                     n_sampling=recipe.num_samples,
                     search_space=recipe.search_space(),
                     search_alg=search_alg,
                     search_alg_params=None,
                     scheduler=scheduler,
                     scheduler_params=scheduler_params)

    print("Training completed.")
    best_model = auto_xgb_reg.get_best_model()
    y_hat = best_model.predict(X_val)

    from zoo.automl.common.metrics import Evaluator
    rmse = Evaluator.evaluate(metric="rmse", y_true=y_val, y_pred=y_hat)
    print(f"Evaluate: the square root of mean square error is {rmse:.2f}")

    ray_ctx.stop()
    sc.stop()
                         name="auto_lstm")
    auto_lstm.fit(
        data=get_data_creator(tsdata_train),
        epochs=args.epoch,
        batch_size=hp.choice([32, 64]),
        validation_data=get_data_creator(tsdata_valid),
        n_sampling=args.n_sampling,
    )
    best_model = auto_lstm.get_best_model()
    best_config = auto_lstm.get_best_config()

    x, y = tsdata_test\
        .roll(lookback=best_config["past_seq_len"],
              horizon=best_config["future_seq_len"])\
        .to_numpy()
    yhat = best_model.predict(x)

    y_unscale = tsdata_test.unscale_numpy(y)
    yhat_unscale = tsdata_test.unscale_numpy(np.expand_dims(yhat, axis=1))

    result = [
        Evaluator.evaluate(m,
                           y_true=y_unscale,
                           y_pred=yhat_unscale,
                           multioutput="uniform_average")
        for m in ['rmse', 'smape']
    ]
    print(f'rmse is {result[0]}, sampe is {result[1]}')
    print(f'The hyperparameters of the model are {best_config}')
    stop_orca_context()