Python to_pandas Exemples, gluonts.dataset.util.to_pandas Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : startup_gluonts.py Projet : selvaHome/gluonts

def build_ff_model():
    # get the csv file as a dataframe
    raw_data = pd.read_csv(
        "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv",
        header=0,
        index_col=0)

    # convert the raw data into an object recognised by GluonTS
    # start: the starting index of the dataframe
    # target: the actual time-series data that we want to model
    # freq: the frequency with which the data is collected
    train_data = common.ListDataset(
        [{
            "start": raw_data.index[0],
            "target": raw_data.value[:"2015-04-05 00:00:00"]
        }],
        freq="5min")

    # create an Estimator with simple feed forward model
    # an object of Trainer() class is used to customize Estimator
    estimator = simple_feedforward.SimpleFeedForwardEstimator(
        freq="5min",
        prediction_length=100,
        trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-3))

    # create a Predictor by training the Estimator with training dataset
    predictor = estimator.train(training_data=train_data)

    # get predictions for the whole forecast horizon
    for model_train_data, predictions in zip(train_data,
                                             predictor.predict(train_data)):
        # plot only the last 100 timestamps of the training dataset
        to_pandas(model_train_data)[-100:].plot()
        # plot the forecasts from the model
        predictions.plot(output_file='ff-model.png', color='r')

Exemple #2

0

Afficher le fichier

    def prepare_output(self, forecasts, horizon):
        # Get forecasts depending on horizon
        forecasts = forecasts[-horizon:]

        if self.type == 'deepar':
            # From iterator to pandas
            train = to_pandas(next(iter(self.y_train)))
            test = to_pandas(next(iter(self.y_test)))

            date = test.index.astype(str).tolist()

            target = test.tolist()

            prediction = np.full(len(train.index), np.nan).tolist()
            prediction.extend(forecasts)

        else:
            if self.type == 'prophet':
                if not isinstance(self.y_train, pd.Series):
                    self.y_train = self.y_train.set_index('ds').iloc[:, 0]
                if not isinstance(self.y_test, pd.Series):
                    self.y_test = self.y_test.set_index('ds').iloc[:, 0]

            date = self.y_train.index.astype(str).tolist()
            date.extend(self.y_test.index.astype(str).tolist())

            target = self.y_train.tolist()
            target.extend(self.y_test.tolist())

            prediction = np.full(len(self.y_train.index), np.nan).tolist()
            prediction.extend(forecasts)

        return date, target, prediction

Exemple #3

0

Afficher le fichier

Fichier : startup_gluonts.py Projet : selvaHome/gluonts

def build_deepar_model():
    # get the financial data "exchange_rate"
    gluon_data = get_dataset("exchange_rate", regenerate=True)
    train_data = next(iter(gluon_data.train))
    test_data = next(iter(gluon_data.test))
    meta_data = gluon_data.metadata

    # data set visualisation
    fig, ax = plt.subplots(1, 1, figsize=(11, 8))
    to_pandas(train_data).plot(ax=ax)
    ax.grid(which="both")
    ax.legend(["train data"], loc="upper left")
    plt.savefig("dataset.png")

    # visualize various members of the 'gluon_data.*'
    print(train_data.keys())
    print(test_data.keys())
    print(meta_data)

    # convert dataset into an object recognised by GluonTS
    training_data = common.ListDataset(gluon_data.train, freq=meta_data.freq)
    testing_data = common.ListDataset(gluon_data.test, freq=meta_data.freq)

    # create an Estimator with DeepAR
    # an object of Trainer() class is used to customize Estimator
    estimator = deepar.DeepAREstimator(
        freq=meta_data.freq,
        prediction_length=meta_data.prediction_length,
        trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-4))

    # create a Predictor by training the Estimator with training dataset
    predictor = estimator.train(training_data=training_data)

    # make predictions
    forecasts, test_series = make_evaluation_predictions(dataset=testing_data,
                                                         predictor=predictor,
                                                         num_samples=10)

    # visualise forecasts
    prediction_intervals = (50.0, 90.0)
    legend = ["actual data", "median forecast"
              ] + [f"{k}% forecast interval"
                   for k in prediction_intervals][::-1]
    fig, ax = plt.subplots(1, 1, figsize=(11, 8))
    list(test_series)[0][-150:].plot(ax=ax)  # plot the time series
    list(forecasts)[0].plot(prediction_intervals=prediction_intervals,
                            color='r')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.savefig("deepar-model.png")

Exemple #4

0

Afficher le fichier

 def predict(self, dataset: Iterable[Dict]) -> Iterator[SampleForecast]:
     for entry in dataset:
         ts = to_pandas(entry)
         start = ts.index[-1] + pd.tseries.frequencies.to_offset(self.freq)
         start_timestamp = pd.Timestamp(start, freq=self.freq)
         future_entry = {
             "start": start_timestamp,
             "target": np.array([None] * self.prediction_length),
         }
         future_ts = to_pandas(future_entry)
         df = get_prediction_dataframe(future_ts)
         ag_output = self.ag_model.predict(df)
         yield self.to_forecast(ag_output, start_timestamp,
                                entry.get(FieldName.ITEM_ID, None))

Exemple #5

0

Afficher le fichier

def get_dataset(**kw):
    ##check whether dataset is of kind train or test
    data_path = kw['train_data_path'] if kw['train'] else kw['test_data_path']

    #### read from csv file
    if kw.get("uri_type") == "pickle":
        data_set = pd.read_pickle(data_path)
    else:
        data_set = pd.read_csv(data_path)

    ### convert to gluont format
    gluonts_ds = ListDataset([{
        FieldName.TARGET: data_set.iloc[i].values,
        FieldName.START: kw['start']
    } for i in range(kw['num_series'])],
                             freq=kw['freq'])

    if VERBOSE:
        entry = next(iter(gluonts_ds))
        train_series = to_pandas(entry)
        train_series.plot()
        save_fig = kw['save_fig']
        plt.savefig(save_fig)

    return gluonts_ds

Exemple #6

0

Afficher le fichier

Fichier : example.py Projet : yifeim/gluon-ts

def run_example():
    dataset = get_dataset("electricity")

    estimator = TabularEstimator(
        freq="H",
        prediction_length=24,
        time_limits=2 * 60,  # two minutes for training
        disable_auto_regression=
        True,  # makes prediction faster, but potentially less accurate
    )

    n_train = 5

    training_data = list(islice(dataset.train, n_train))

    predictor = estimator.train(training_data=training_data, )

    forecasts = list(predictor.predict(training_data))

    for entry, forecast in zip(training_data, forecasts):
        ts = to_pandas(entry)
        plt.figure()
        plt.plot(ts[-7 * predictor.prediction_length:], label="target")
        forecast.plot()
        plt.show()

Exemple #7

0

Afficher le fichier

    def _predict_batch_autoreg(self, dataset: Iterable[Dict],
                               **kwargs) -> Iterator[SampleForecast]:
        # TODO clean up
        # TODO optimize
        batch_ids = []
        batch_scales = []
        batch_series = []

        for entry in dataset:
            batch_ids.append(entry.get(FieldName.ITEM_ID, None))
            series, scale = self.scaling(to_pandas(entry))
            batch_scales.append(scale)
            batch_series.append(series)

        batch_forecast_indices = [
            pd.date_range(
                series.index[-1] + series.index.freq,
                freq=series.index.freq,
                periods=self.prediction_length,
            ) for series in batch_series
        ]

        batch_full_series = [
            series.append(
                pd.Series(
                    [None] * self.prediction_length,
                    index=forecast_index,
                )) for series, forecast_index in zip(batch_series,
                                                     batch_forecast_indices)
        ]

        output = np.zeros((len(batch_series), self.prediction_length),
                          dtype=self.dtype)

        for k in range(self.prediction_length):
            dfs = []
            for fs, idx in zip(batch_full_series, batch_forecast_indices):
                idx_k = idx[k]
                dfs.append(
                    get_features_dataframe(
                        fs[idx_k:idx_k],
                        time_features=self.time_features,
                        lag_indices=self.lag_indices,
                        past_data=fs[:idx_k][:-1],
                    ))
            df = pd.concat(dfs)
            out_k = self.ag_model.predict(df)
            output[:, k] = out_k
            for fs, idx, v in zip(batch_full_series, batch_forecast_indices,
                                  out_k):
                fs.at[idx[k]] = v

        for arr, scale, forecast_index, item_id in zip(output, batch_scales,
                                                       batch_forecast_indices,
                                                       batch_ids):
            yield self._to_forecast(
                scale * arr,
                forecast_index[0],
                item_id=item_id,
            )

Exemple #8

0

Afficher le fichier

Fichier : example.py Projet : vishalbelsare/gluon-ts

def run_example():
    dataset = get_dataset("electricity")
    serialize_path = Path("GluonTSTabularPredictor")
    estimator = TabularEstimator(
        freq="H",
        prediction_length=24,
        time_limit=10,  # two minutes for training
        disable_auto_regression=True,  # makes prediction faster, but potentially less accurate
        last_k_for_val=24,  # split the last 24 targets from each time series to be the validation data
        quantiles_to_predict=None,
    )

    n_train = 5

    training_data = list(islice(dataset.train, n_train))

    predictor = estimator.train(training_data=training_data)

    os.makedirs(serialize_path, exist_ok=True)
    predictor.serialize(serialize_path)
    predictor = None
    predictor = Predictor.deserialize(serialize_path)
    forecasts = list(predictor.predict(training_data))

    for entry, forecast in zip(training_data, forecasts):
        ts = to_pandas(entry)
        plt.figure()
        plt.plot(ts[-7 * predictor.prediction_length :], label="target")
        forecast.plot()
        plt.show()

Exemple #9

0

Afficher le fichier

 def check_consistency(entry, f1, f2):
     ts = to_pandas(entry)
     start_timestamp = ts.index[-1] + 1
     assert f1.samples.shape == (1, prediction_length)
     assert f1.start_date == start_timestamp
     assert f2.samples.shape == (1, prediction_length)
     assert f2.start_date == start_timestamp
     assert np.allclose(f1.samples, f2.samples)

Exemple #10

0

Afficher le fichier

 def train(self, training_data: Dataset) -> TabularPredictor:
     # every time there is only one time series passed
     # list(training_data)[0] is essentially getting the only time series
     dfs = [
         get_prediction_dataframe(to_pandas(entry))
         for entry in training_data
     ]
     df = pd.concat(dfs)
     ag_model = self.task.fit(df, label="target")
     return TabularPredictor(ag_model, self.freq, self.prediction_length)

Exemple #11

0

Afficher le fichier

def multivar_df(ds):
    df = pd.DataFrame()
    for i in range(ds["target"].shape[0]):
        tmp = {}
        for k in ds:
            if k == "target":
                tmp["target"] = ds["target"][i]
            else:
                tmp[k] = ds[k]
        tmp_df = to_pandas(tmp).to_frame().rename(columns={0: f"ts_{i}"})
        df = pd.concat([df, tmp_df], axis=1, sort=True)

    return df.reset_index().rename(columns={"index": "time"})

Exemple #12

0

Afficher le fichier

    def train(self, training_data: Dataset) -> TabularPredictor:
        dfs = [
            get_prediction_dataframe(to_pandas(entry))
            for entry in training_data
        ]
        df = pd.concat(dfs)

        ag_model = self.task.fit(df,
                                 label="target",
                                 problem_type="regression",
                                 **self.kwargs)

        return TabularPredictor(ag_model, self.freq, self.prediction_length)

Exemple #13

0

Afficher le fichier

Fichier : plots.py Projet : Naaapp/MasterThesis

def plot_train_test_dataset_first(dataset):
    entry = next(iter(dataset.train_ds))
    train_series = to_pandas(entry)
    train_series.plot()
    plt.grid(which="both")
    plt.legend(["train series"], loc="upper left")
    plt.show()

    entry = next(iter(dataset.test_ds))
    test_series = to_pandas(entry)
    test_series.plot()
    plt.axvline(train_series.index[-1], color='r')  # end of train dataset
    plt.grid(which="both")
    plt.legend(["test series", "end of train series"], loc="upper left")
    plt.show()

    print(f"Length of forecasting window in test dataset: "
          f"{len(test_series) - len(train_series)}")
    print(f"Learning length: "
          f"{dataset.learning_length}")
    print(f"Recommended prediction horizon: "
          f"{dataset.prediction_length}")
    print(f"Frequency of the time series: {dataset.freq}")

Exemple #14

0

Afficher le fichier

Fichier : evaluation.py Projet : jlgleason/hts-constrained-embeddings

def evaluate_optimal_rec(
    predictions: pd.DataFrame,
    test_data: ListDataset,
    hierarchy_dict: Dict[int, List[int]],
) -> Dict[str, Dict[str, float]]:
    """ aggregates error metrics for each level of the hierarchy, calculated over data frame of point
        estimates (for example, those returned after optimal reconciliation) instead of probabilistic 
        forecast objects

    Arguments:
        predictions {pd.DataFrame} -- data frame of point predictions
        test_data {ListDataset} -- test dataset
        hierarchy_dict {Dict[int, List[int]]} -- mapping from hierachy level to series prediction idxs included
            in that level of hierarchy

    Returns:
        Dict[str, Dict[str, float]] -- mapping of hierarchy level (0-indexed) to dictionaries of aggregated metrics 
            for that level of the hierarchy
    """

    evaluator = PointEstimateEvaluator()
    evaluations = {
        level: evaluator(
            [
                to_pandas(series)
                for series in np.array(list(test_data))[np.array(idxs)]
            ],
            predictions.values.T[np.array(idxs)],
        )
        for level, idxs in hierarchy_dict.items()
    }
    evaluations['all'] = evaluator(
        [to_pandas(series) for series in np.array(list(test_data))],
        predictions.values.T,
    )
    return evaluations

Exemple #15

0

Afficher le fichier

Fichier : predictor.py Projet : yifeim/gluon-ts

    def _predict_batch_one_shot(
        self, dataset: Iterable[Dict], **kwargs
    ) -> Iterator[SampleForecast]:
        # TODO clean up
        # TODO optimize
        item_ids = []
        scales = []
        forecast_start_timestamps = []
        dfs = []

        for entry in dataset:
            item_ids.append(entry.get(FieldName.ITEM_ID, None))
            series, scale = self.scaling(to_pandas(entry))
            scales.append(scale)
            forecast_start = series.index[-1] + series.index.freq
            forecast_start_timestamps.append(forecast_start)
            forecast_index = pd.date_range(
                forecast_start,
                freq=series.index.freq,
                periods=self.prediction_length,
            )
            forecast_series = pd.Series(
                [None] * self.prediction_length,
                index=forecast_index,
            )
            dfs.append(
                get_features_dataframe(
                    forecast_series,
                    time_features=self.time_features,
                    lag_indices=self.lag_indices,
                    past_data=series,
                )
            )

        df = pd.concat(dfs)
        output = self.ag_model.predict(df)

        for arr, scale, forecast_start, item_id in zip(
            np.split(output, len(dfs)),
            scales,
            forecast_start_timestamps,
            item_ids,
        ):
            yield self._to_forecast(
                scale * arr,
                forecast_start,
                item_id=item_id,
            )

Exemple #16

0

Afficher le fichier

def compute_time_features(
    entry: Dict,
    time_features: List[TimeFeature],
    pred_length: int = 0,
    dtype=np.float32,
):
    assert pred_length >= 0
    index = to_pandas(entry, freq=entry["start"].freq).index

    if pred_length > 0:
        index = index.union(
            pd.period_range(index[-1] + 1,
                            index[-1] + pred_length,
                            freq=index.freq))

    feature_arrays = [feat(index) for feat in time_features]
    return np.vstack(feature_arrays).astype(dtype)

Exemple #17

0

Afficher le fichier

def fit_predict_arima(
    training_data: ListDataset,
    horizon: int = 12,
    output_file: str = None,
    output_residuals: bool = True,
) -> pd.DataFrame:
    """ for each time series in the training_data individually:
            1) automatically discovers the optimal order for a seasonal ARIMA model 
            2) fits discovered model
            3) makes predictions horizon length into the future

        optionally writes predictions/in-sample residuals to output file

    Arguments:
        training_data {ListDataset} -- training data
    
    Keyword Arugments:
        horizon {int} -- prediction length (default: {12})
        output_file {str} -- output_file to save predictions (default: {None})
        output_residuals {bool} -- whether to output the residuals of in-sample predictions. If True, 
            the in-sample residuals will be prepended to the out-of-sample predictions. Thus, 
            if the in-sample data contains 24 timeteps, and the out-of-sample data contains 6 timesteps,
            the output data frame will contain 30 rows (timesteps) (default: {True})

    Returns:
        pd.DataFrame -- dataframe of point predictions from individually fitted ARIMA models,
            each column represents a series and each row a future point in time
    """

    fits = [
        pm.auto_arima(to_pandas(train_series), suppress_warnings=True, error_action='ignore')
        for train_series in list(training_data)
    ]

    preds = pd.DataFrame([fit.predict(n_periods=horizon) for fit in fits]).T   

    if output_file:
        if output_residuals:
            residuals = pd.DataFrame([
                fit.predict_in_sample() - series['target'] for fit, series in zip(fits, training_data)
            ]).T
            preds = pd.concat([residuals, preds])
        preds.to_csv(output_file, index = False)

    return preds

Exemple #18

0

Afficher le fichier

Fichier : predictor.py Projet : yifeim/gluon-ts

    def _predict_serial(
        self, dataset: Iterable[Dict], **kwargs
    ) -> Iterator[SampleForecast]:
        for entry in dataset:
            series, scale = self.scaling(to_pandas(entry))

            forecast_index = pd.date_range(
                series.index[-1] + series.index.freq,
                freq=series.index.freq,
                periods=self.prediction_length,
            )

            forecast_series = pd.Series(
                [None] * len(forecast_index),
                index=forecast_index,
            )

            full_series = series.append(forecast_series)

            if not self.auto_regression:  # predict all at once
                df = get_features_dataframe(
                    forecast_series,
                    time_features=self.time_features,
                    lag_indices=self.lag_indices,
                    past_data=series,
                )
                full_series[forecast_series.index] = self.ag_model.predict(df)

            else:  # predict step by step
                for idx in forecast_series.index:
                    df = get_features_dataframe(
                        forecast_series[idx:idx],
                        time_features=self.time_features,
                        lag_indices=self.lag_indices,
                        past_data=full_series[:idx][:-1],
                    )
                    full_series[idx] = self.ag_model.predict(df).item()

            yield self._to_forecast(
                scale * full_series[forecast_index].values.astype(self.dtype),
                forecast_index[0],
                item_id=entry.get(FieldName.ITEM_ID, None),
            )

Exemple #19

0

Afficher le fichier

 def train(self, training_data: Dataset) -> TabularPredictor:
     dfs = [
         get_features_dataframe(
             series=self.scaling(to_pandas(entry))[0],
             time_features=self.time_features,
             lag_indices=self.lag_indices,
         ) for entry in training_data
     ]
     df = pd.concat(dfs)
     ag_model = self.task.fit(df,
                              label="target",
                              problem_type="regression",
                              **self.kwargs)
     return TabularPredictor(
         ag_model=ag_model,
         freq=self.freq,
         prediction_length=self.prediction_length,
         time_features=self.time_features,
         lag_indices=self.lag_indices,
         scaling=self.scaling,
         batch_size=self.batch_size,
     )

Exemple #20

0

Afficher le fichier

def test_autogluon_tabular():
    # create a dataset
    dataset = ListDataset(
        [
            {
                "start":
                pd.Timestamp("1750-01-04 00:00:00", freq="W-SUN"),
                "target":
                np.array(
                    [1089.2, 1078.91, 1099.88, 35790.55, 34096.95, 34906.95
                     ], ),
            },
            {
                "start":
                pd.Timestamp("1750-01-04 00:00:00", freq="W-SUN"),
                "target":
                np.array(
                    [1099.2, 1098.91, 1069.88, 35990.55, 34076.95, 34766.95
                     ], ),
            },
        ],
        freq="W-SUN",
    )
    prediction_length = 2
    freq = "W-SUN"
    predictor = LocalTabularPredictor(
        freq=freq,
        prediction_length=prediction_length,
    )
    forecasts_it = predictor.predict(dataset)
    forecasts = list(forecasts_it)

    for entry, forecast in zip(dataset, forecasts):
        ts = to_pandas(entry)
        start_timestamp = ts.index[-1] + pd.tseries.frequencies.to_offset(freq)
        assert forecast.samples.shape[1] == prediction_length
        assert forecast.start_date == start_timestamp

    return forecasts

Exemple #21

0

Afficher le fichier

 def __init__(self,
              dataset=None,
              custom_dataset=None,
              start=None,
              freq=None,
              prediction_length=None,
              learning_length=None,
              context_length=100,
              cardinality=None):
     if dataset is not None:
         self.learning_length = len(to_pandas(next(iter(dataset.train))))
         self.prediction_length = dataset.metadata.prediction_length
         self.freq = dataset.metadata.freq
         self.test_ds = dataset.test
         self.train_ds = dataset.train
         self.context_length = context_length
         self.cardinality = list([1])
     elif custom_dataset is not None:
         self.freq = freq
         self.start = start
         self.learning_length = learning_length
         self.prediction_length = prediction_length
         self.context_length = context_length
         self.cardinality = cardinality
         # train dataset: cut the last window of length "prediction_length",
         # add "target" and "start" fields
         self.train_ds = ListDataset([{
             'target': x,
             'start': start
         } for x in custom_dataset[:, :-prediction_length]],
                                     freq=freq)
         # test dataset: use the whole dataset, add "target" and "start"
         self.test_ds = ListDataset([{
             'target': x,
             'start': start
         } for x in custom_dataset],
                                    freq=freq)

Exemple #22

0

Afficher le fichier

Fichier : deepAR2_0825_01_train.py Projet : PandoraLS/python_toys

import pandas as pd
import matplotlib.pyplot as plt
csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv'
df = pd.read_csv(csv_path,header=0,sep=',')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index(['timestamp'],inplace=True)

# print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53]
# print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据
# print("开始时间戳", df.index[0]) # start是开始的时间戳，target对应的是对应时间戳的序列信息
data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的
# 这里df.index是时间戳，df.value是时间戳对应的值

estimator = deepar.DeepAREstimator(
    freq='H',
    prediction_length=24,
    trainer=Trainer(epochs=50)
)

predictor = estimator.train(training_data=data)

predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save"))
for train_entry, predict_result in zip(data, predictor.predict(data)):
    to_pandas(train_entry)[-60:].plot(linewidth=2)
    predict_result.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.show()
##输出预测结果
prediction = next(predictor.predict(data))
print(prediction.mean)
prediction.plot(output_file='graph.png')

Exemple #23

0

Afficher le fichier

Fichier : rolling_dataset.py Projet : vishalbelsare/gluon-ts

def generate_rolling_dataset(
    dataset: Dataset,
    strategy,
    start_time: pd.Timestamp,
    end_time: Optional[pd.Timestamp] = None,
) -> Dataset:
    """
    Returns an augmented version of the input dataset where each timeseries has
    been rolled upon based on the parameters supplied. Below follows an
    explanation and examples of how the different parameters can be used to generate
    differently rolled datasets.

    The *rolling* happens on the data available in the provided window between the
    *start_time* and the *end_time* for each timeseries. If *end_time* is omitted, rolling
    happens on all datapoints from *start_time* until the end of the timeseries.
    The way the data is rolled is governed by the strategy used.

    Below examples will be based on this one timeseries long dataset

    >>> ds = [{
    ...     "target": np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]),
    ...     "start": pd.Timestamp('2000-1-1-01', freq='1H')
    ... }]

    applying generate_rolling_dataset on this dataset like:

    >>> rolled = generate_rolling_dataset(
    ...     dataset=ds,
    ...     strategy = StepStrategy(prediction_length=2),
    ...     start_time = pd.Timestamp('2000-1-1-06', '1H'),
    ...     end_time = pd.Timestamp('2000-1-1-10', '1H')
    ... )

    Results in a new dataset as follows (only target values shown for brevity):

        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9]\n
        [1, 2, 3, 4, 5, 6, 7, 8]\n
        [1, 2, 3, 4, 5, 6, 7]\n

    i.e. maximum amount of rolls possible between the *end_time* and *start_time*.
    The StepStrategy only cuts the last value of the target for as long as
    there is enough values after *start_time* to perform predictions on.

    When no end time is provided the output is as below since all datapoints
    from *start_time* will be rolled over.

        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9]\n
        [1, 2, 3, 4, 5, 6, 7, 8]\n
        [1, 2, 3, 4, 5, 6, 7]

    One can change the step_size of the strategy as below:

    >>> strategy = StepStrategy(prediction_length=2, step_size=2)


    This causes fewer values to be in the output which,
    when prediction_length matches step_size, ensures that each prediction
    will be done on unique/new data. Below is the output when the above strategy is used.

        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n
        [1, 2, 3, 4, 5, 6, 7, 8]

    Not setting an end time and using the step_size=2 results in
    the below dataset.

        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]\n
        [1, 2, 3, 4, 5, 6, 7, 8, 9]\n
        [1, 2, 3, 4, 5, 6, 7]

    Parameters
    ----------
    dataset
        Dataset to generate the rolling forecasting datasets from
    strategy
        The strategy that is to be used when rolling
    start_time
        The start of the window where rolling forecasts should be applied
    end_time
        The end time of the window where rolling should be applied

    Returns
    ----------
    Dataset
        The augmented dataset


    """
    assert dataset, "a dataset to perform rolling evaluation on is needed"
    assert start_time, "a pandas Timestamp object is needed for the start time"
    assert strategy, """a strategy to use when rolling is needed, for example
        gluonts.dataset.rolling_dataset.StepStrategy"""
    if end_time:
        assert end_time > start_time, "end time has to be after the start time"

    ds = []
    for item in dataset:
        series = to_pandas(item, start_time.freq)
        base = series[:start_time][:-1].to_numpy()
        prediction_window = series[start_time:end_time]

        for window in strategy.get_windows(prediction_window):
            new_item = item.copy()
            new_item[FieldName.TARGET] = np.concatenate(
                [base, window.to_numpy()]
            )
            new_item = truncate_features(
                new_item, len(new_item[FieldName.TARGET])
            )
            ds.append(new_item)

    return ds

Exemple #24

0

Afficher le fichier

import mxnet as mx
from mxnet import gluon
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

from gluonts.dataset.repository.datasets import get_dataset, dataset_recipes
from gluonts.dataset.util import to_pandas

print(f"Available datasets: {list(dataset_recipes.keys())}")

dataset = get_dataset("m4_hourly", regenerate=False)

entry = next(iter(dataset.train))
train_series = to_pandas(entry)
train_series.plot()
plt.grid(which="both")
plt.legend(["train series"], loc="upper left")
plt.show()

entry = next(iter(dataset.test))
test_series = to_pandas(entry)
test_series.plot()
plt.axvline(train_series.index[-1], color='r')  # end of train dataset
plt.grid(which="both")
plt.legend(["test series", "end of train series"], loc="upper left")
plt.show()

from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer

Exemple #25

0

Afficher le fichier

Fichier : estimator.py Projet : vishalbelsare/gluon-ts

    def train(
        self,
        training_data: Dataset,
        validation_data: Optional[Dataset] = None,
    ) -> TabularPredictor:

        kwargs_override = {}

        dfs = [
            get_features_dataframe(
                series=self.scaling(to_pandas(entry))[0],
                time_features=self.time_features,
                lag_indices=self.lag_indices,
            ) for entry in training_data
        ]
        if validation_data is not None or self.last_k_for_val is not None:
            kwargs_override["auto_stack"] = False
            logger.warning(
                "Auto Stacking is turned off "
                "as validation dataset is provided before input into Tabular Predictor."
            )

        if validation_data is not None:
            logger.log(20, "Validation dataset is directly provided.")
            validation_dfs = [
                get_features_dataframe(
                    series=self.scaling(to_pandas(entry))[0],
                    time_features=self.time_features,
                    lag_indices=self.lag_indices,
                ) for entry in validation_data
            ]
            train_df = pd.concat(dfs)
            val_df = pd.concat(validation_dfs)
        elif self.last_k_for_val is not None:
            logger.log(
                20,
                f"last_k_for_val is provided, choosing last {self.last_k_for_val} of each time series as validation set.",
            )
            train_dfs = [
                tmp_df.iloc[:-self.last_k_for_val, :] for tmp_df in dfs
            ]
            validation_dfs = [
                tmp_df.iloc[-self.last_k_for_val:, :] for tmp_df in dfs
            ]
            train_df = pd.concat(train_dfs)
            val_df = pd.concat(validation_dfs)
        else:
            logger.log(
                20,
                "No validation dataset is provided, will let TabularPredictor do the splitting automatically,"
                "Note that this might break the time order of time series data.",
            )
            train_df = pd.concat(dfs)
            val_df = None

        if self.quantiles_to_predict is not None:
            ag_model = AutogluonTabularPredictor(
                label="target",
                problem_type="quantile",
                quantile_levels=self.quantiles_to_predict,
            ).fit(
                train_df,
                tuning_data=val_df,
                **{
                    **self.kwargs,
                    **kwargs_override
                },
            )
        else:
            ag_model = AutogluonTabularPredictor(
                label="target",
                problem_type="regression",
                eval_metric=self.eval_metric,
            ).fit(
                train_df,
                tuning_data=val_df,
                **{
                    **self.kwargs,
                    **kwargs_override
                },
            )

        return TabularPredictor(
            ag_model=ag_model,
            freq=self.freq,
            prediction_length=self.prediction_length,
            time_features=self.time_features,
            lag_indices=self.lag_indices,
            scaling=self.scaling,
            batch_size=self.batch_size,
            quantiles_to_predict=self.quantiles_to_predict,
        )

Exemple #26

0

Afficher le fichier

def quick_start_tutorial():
	# Provided datasets.

	print(f"Available datasets: {list(dataset_recipes.keys())}")

	dataset = get_dataset("m4_hourly", regenerate=True)

	entry = next(iter(dataset.train))

	plt.figure()
	train_series = to_pandas(entry)
	train_series.plot()
	plt.grid(which="both")
	plt.legend(["train series"], loc="upper left")

	entry = next(iter(dataset.test))

	plt.figure()
	test_series = to_pandas(entry)
	test_series.plot()
	plt.axvline(train_series.index[-1], color="r")  # End of train dataset.
	plt.grid(which="both")
	plt.legend(["test series", "end of train series"], loc="upper left")

	plt.show()

	#--------------------
	# Custom datasets.

	N = 10  # Number of time series.
	T = 100  # Number of timesteps.
	prediction_length = 24
	freq = "1H"
	custom_dataset = np.random.normal(size=(N, T))
	start = pd.Timestamp("01-01-2019", freq=freq)  # Can be different for each time series.

	# Train dataset: cut the last window of length "prediction_length", add "target" and "start" fields.
	train_ds = ListDataset(
		[{"target": x, "start": start} for x in custom_dataset[:, :-prediction_length]],
		freq=freq
	)
	# Test dataset: use the whole dataset, add "target" and "start" fields.
	test_ds = ListDataset(
		[{"target": x, "start": start} for x in custom_dataset],
		freq=freq
	)

	#--------------------
	# Training an existing model (Estimator).

	estimator = SimpleFeedForwardEstimator(
		num_hidden_dimensions=[10],
		prediction_length=dataset.metadata.prediction_length,
		context_length=100,
		freq=dataset.metadata.freq,
		trainer=Trainer(
			ctx="cpu",
			epochs=5,
			learning_rate=1e-3,
			num_batches_per_epoch=100
		)
	)

	predictor = estimator.train(dataset.train)

	#--------------------
	# Visualize and evaluate forecasts.

	forecast_it, ts_it = make_evaluation_predictions(
		dataset=dataset.test,  # Test dataset.
		predictor=predictor,  # Predictor.
		num_samples=100,  # Number of sample paths we want for evaluation.
	)

	forecasts = list(forecast_it)
	tss = list(ts_it)

	# First entry of the time series list.
	ts_entry = tss[0]

	# First 5 values of the time series (convert from pandas to numpy).
	print(np.array(ts_entry[:5]).reshape(-1,))

	# First entry of dataset.test.
	dataset_test_entry = next(iter(dataset.test))

	# First 5 values.
	print(dataset_test_entry["target"][:5])

	# First entry of the forecast list.
	forecast_entry = forecasts[0]

	print(f"Number of sample paths: {forecast_entry.num_samples}")
	print(f"Dimension of samples: {forecast_entry.samples.shape}")
	print(f"Start date of the forecast window: {forecast_entry.start_date}")
	print(f"Frequency of the time series: {forecast_entry.freq}")

	print(f"Mean of the future window:\n {forecast_entry.mean}")
	print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

	def plot_prob_forecasts(ts_entry, forecast_entry):
		plot_length = 150
		prediction_intervals = (50.0, 90.0)
		legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

		fig, ax = plt.subplots(1, 1, figsize=(10, 7))
		ts_entry[-plot_length:].plot(ax=ax)  # Plot the time series.
		forecast_entry.plot(prediction_intervals=prediction_intervals, color="g")
		plt.grid(which="both")
		plt.legend(legend, loc="upper left")
		plt.show()

	plot_prob_forecasts(ts_entry, forecast_entry)

	evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
	agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test))

	print(json.dumps(agg_metrics, indent=4))
	print(item_metrics.head())

	item_metrics.plot(x="MSIS", y="MASE", kind="scatter")
	plt.grid(which="both")

	plt.show()

Exemple #27

0

Afficher le fichier

Fichier : TrainingHarness.py Projet : MichaelAshton/time-series-prediction

    def cv_harness_trainer(self):

        list_y_pred = []
        list_score = []

        models = self.model
        type_time_series = self.type_time_series

        train_data = self.train_data
        val_data = self.val_data
        n_splits = self.n_splits
        out_path = self.out_path
        freq = self.freq
        prediction_length = self.prediction_length
        start_date = self.start_date
        train_entry = next(iter(train_data))
        val_entry = next(iter(val_data))

        for models_ in models.get(type_time_series):

            for key, value in models_.items():
                model_name = key
                model = value[0]
                model_kwarg = value[1]

                self.__log.info(
                    "Starting generic cv train loop for Type_of_time_series={}, mlmodel={}, modelkwarg={}"
                    .format(type_time_series, model_name, model_kwarg))

                self.__log.info("Regression")
                splits = TimeSeriesSplit(n_splits=n_splits)

                X_train = to_pandas(train_entry).values
                X_val = to_pandas(val_entry).values

                # start the cross validation loop
                self._internal_cv_trainer(
                    models=model,
                    model_name=model_name,
                    X_train=X_train,
                    X_test=X_test,
                    ss=splits,
                    n_splits=n_splits,
                    list_y_pred=list_y_pred,
                    list_score=list_score,
                    out_path=out_path,
                    start_date=start_date,
                    prediction_length=prediction_length,
                    freq=freq,
                    type_time_series=type_time_series,
                )

                # output the dataframe of predicted vals with index as sample numbers
                y_pred_list_df = pd.concat(list_y_pred, axis=1)

                # one liner to remove duplicate columns
                y_pred_list_df = y_pred_list_df.loc[:, ~y_pred_list_df.columns.
                                                    duplicated()]
                y_pred_list_df.set_index("index", inplace=True)

                y_pred_list_df_path_csv = os.path.join(
                    out_path,
                    "saved_models",
                    "y_pred_list_df.csv",
                )

                y_pred_list_df.to_csv(y_pred_list_df_path_csv)

                # output the dataframe of scores with index as sample numbers
                score_list_df = pd.concat(list_score, axis=1)

                # one liner to remove duplicate columns
                score_list_df = score_list_df.loc[:, ~score_list_df.columns.
                                                  duplicated()]

                score_list_df_path_csv = os.path.join(
                    out_path, "saved_models",
                    str(type_time_series + "_" + model_name + "_" +
                        "regression" + "_score_list_df.csv"))
                # print(score_list_df)
                # print(score_list_df_path_csv)
                score_list_df.to_csv(score_list_df_path_csv)

        return y_pred_list_df, score_list_df

Exemple #28

0

Afficher le fichier

Fichier : TrainingHarness.py Projet : MichaelAshton/time-series-prediction

    def _internal_cv_trainer(
        self,
        model_name: str,
        models: object,
        type_time_series: str,
        X: np.array,
        ss: object,
        n_splits: int,
        list_y_pred: list,
        list_score: list,
        out_path: str,
        freq: int,
        prediction_length: int,
        start_date: str,
    ):
        """ Cross validation training loop for an individual regression models.

        Parameters
        ----------
        :param: ``X`` : ``np.array``
            Array of time series data.

        :param: ``model_name`` : ``str``
            Machine learning model type.

        :param: ``models`` : ``regression model class object``
            Within the training loop, an instantiated model is passed to this method.

        :param: ``type_time_series`` : ``dict``
            Type of time series to train model. eg teams or by position

        :param: ``prediction_length`` : ``str``
            Length of the prediction horizon

        :param: ``start`` : ``np.array``
            Start date of the time series

        :param: ``freq`` : ``str eg. '1D', '2H', '3S'...``
            Frequency of the data to train on and predict 

        :param: ``ss`` : ``scikit-learn split iterator object``
            This is an instantiated split iterator object to control time series cross validation
            splitting within the cross validator.

        :param: ``n_splits`` : ``int``
            The number of splits to divide the data into during cross validation.

        :param: ``list_y_pred`` : ``list``
            A tracking list entitity for the predicted values within each model cross-validation loop.

        :param: ``list_score`` : ``list``
            A tracking list entitity for the scored values within each model cross-validation loop.

        :param: ``out_path`` : ``str``
            his is the filepath of generated models and scores.


        Returns
        -------
        :return: ``None``

        """

        split_num = np.int(0)
        y_test_indices = []

        y_pred_df = pd.DataFrame()
        y_pred_ = []
        y_true_ = []

        score_df = pd.DataFrame()
        score_1 = []
        score_2 = []

        for train_index, test_index in ss.split(X=X):

            split_num += 1
            self.__log.info("%%--%%")
            self.__log.info("Cross fold: %i of %i", split_num, n_splits)

            # a workaround made here as the test set after split is not utilised. The training set is split
            dataset, X_test = X[train_index], X[test_index]
            start = pd.Timestamp(start_date, freq)
            X_train = ListDataset([{
                'target': x,
                'start': start
            } for x in dataset.reshape(1, -1)[:, :-prediction_length]],
                                  freq='1H')
            X_test = ListDataset([{
                'target': x,
                'start': start
            } for x in dataset.reshape(1, -1)],
                                 freq='1H')
            y_pred_temp, score_1_temp, score_2_temp = self._cv_train_model_other(
                models=models,
                #                     model_kwarg=model_kwarg,
                X_train=X_train,
                X_test=X_test,
                split_num=split_num,
                out_path=out_path,
                model_name=model_name,
                type_time_series=type_time_series)

            self.__log.info("Score_1_temp={}".format(score_1_temp))

            dataset_ = next(iter(X_test))
            dataset_pd = to_pandas(dataset_)
            y_test = dataset_pd[-prediction_length:].index
            y_test_indices.append(y_test.values)
            y_pred_.append(y_pred_temp)
            y_true_.append(dataset_pd.loc[y_test].values)

            # keep track of the scores during loops
            score_1.append([score_1_temp])
            score_2.append([score_2_temp])

        y_pred_df[str(model_name + "_" +
                      "regression")] = np.concatenate(y_pred_).ravel()

        y_pred_df["index"] = np.concatenate(y_test_indices).ravel()

        y_pred_df[str("y_true_val")] = np.concatenate(y_true_).ravel()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "mse")] = np.concatenate(score_1).ravel()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "rmse")] = np.concatenate(score_2).ravel()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_mse_ave")] = score_df[str(model_name + "_" +
                                                       "regression" + "_" +
                                                       "mse")].mean()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_mse_med")] = score_df[str(model_name + "_" +
                                                       "regression" + "_" +
                                                       "rmse")].median()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_mse_std")] = score_df[str(model_name + "_" +
                                                       "regression" + "_" +
                                                       "mse")].std()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_rmse_ave")] = score_df[str(model_name + "_" +
                                                        "regression" + "_" +
                                                        "rmse")].mean()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_rmse_med")] = score_df[str(model_name + "_" +
                                                        "regression" + "_" +
                                                        "rmse")].median()

        score_df[str(model_name + "_" + "regression" + "_" +
                     "global_rmse_std")] = score_df[str(model_name + "_" +
                                                        "regression" + "_" +
                                                        "rmse")].std()

        path_score_list_path_csv = os.path.join(
            out_path,
            "saved_models",
            str(
                str(type_time_series) + "_" + str(model_name) + "_" +
                str(split_num) + "_" + "regression" + "_" +
                "_cv_score_list.csv"),
        )

        score_df.to_csv(path_score_list_path_csv)

        list_y_pred.append(y_pred_df)
        list_score.append(score_df)

Exemple #29

0

Afficher le fichier

Fichier : deepAR.py Projet : BillyRobertson/PRISMO-trading-research-and-execution-system

)

# Create testing datatset
testing_data_plots = ListDataset(
    [{"start": test_data[target_asset].index[0],
      "target": test_data[target_asset]}],
    freq = "1d"
)
#Create the estimator and train
estimator = DeepAREstimator(freq="1d", prediction_length=pred_len, trainer=Trainer(epochs=100))
predictor = estimator.train(training_data=training_data)

### OPTIONAL PLOT PREDICTION RESULTS
#Forecast
for test_entry, forecast in zip(testing_data_plots, predictor.predict(testing_data)):
    to_pandas(test_entry)[-60:].plot(linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
    
### GENERATE FORECASTS  
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
forecast_it, ts_it = make_evaluation_predictions(
    dataset=testing_data,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)


###### VERY SIMPLE TRADING STRATEGY
#      Signal: If Forecast > Current Price, Buy
                     "     <      "       , Short

Exemple #30

0

Afficher le fichier

    freq="H")

estimator = deepar.DeepAREstimator(prediction_length=24,
                                   context_length=100,
                                   use_feat_static_cat=True,
                                   use_feat_dynamic_real=True,
                                   num_parallel_samples=100,
                                   cardinality=[2, 1],
                                   freq="H",
                                   trainer=Trainer(ctx="cpu",
                                                   epochs=200,
                                                   learning_rate=1e-3))
predictor = estimator.train(training_data=train_data)

for test_entry, forecast in zip(test_data, predictor.predict(test_data)):
    to_pandas(test_entry)[-100:].plot(figsize=(12, 5), linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.legend([
    "past observations", "median prediction", "90% prediction interval",
    "50% prediction interval"
])
plt.show()

prediction = next(predictor.predict(test_data))
print(prediction.mean)
prediction.plot(output_file='graph.png')

predictor.serialize(
    Path("/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/model"))
# predictor = Predictor.deserialize(Path("/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/model"))