Esempio n. 1
0
def build_deepar_model():
    # get the financial data "exchange_rate"
    gluon_data = get_dataset("exchange_rate", regenerate=True)
    train_data = next(iter(gluon_data.train))
    test_data = next(iter(gluon_data.test))
    meta_data = gluon_data.metadata

    # data set visualisation
    fig, ax = plt.subplots(1, 1, figsize=(11, 8))
    to_pandas(train_data).plot(ax=ax)
    ax.grid(which="both")
    ax.legend(["train data"], loc="upper left")
    plt.savefig("dataset.png")

    # visualize various members of the 'gluon_data.*'
    print(train_data.keys())
    print(test_data.keys())
    print(meta_data)

    # convert dataset into an object recognised by GluonTS
    training_data = common.ListDataset(gluon_data.train, freq=meta_data.freq)
    testing_data = common.ListDataset(gluon_data.test, freq=meta_data.freq)

    # create an Estimator with DeepAR
    # an object of Trainer() class is used to customize Estimator
    estimator = deepar.DeepAREstimator(
        freq=meta_data.freq,
        prediction_length=meta_data.prediction_length,
        trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-4))

    # create a Predictor by training the Estimator with training dataset
    predictor = estimator.train(training_data=training_data)

    # make predictions
    forecasts, test_series = make_evaluation_predictions(dataset=testing_data,
                                                         predictor=predictor,
                                                         num_samples=10)

    # visualise forecasts
    prediction_intervals = (50.0, 90.0)
    legend = ["actual data", "median forecast"
              ] + [f"{k}% forecast interval"
                   for k in prediction_intervals][::-1]
    fig, ax = plt.subplots(1, 1, figsize=(11, 8))
    list(test_series)[0][-150:].plot(ax=ax)  # plot the time series
    list(forecasts)[0].plot(prediction_intervals=prediction_intervals,
                            color='r')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.savefig("deepar-model.png")
def test_symbol_and_array(hybridize: bool):
    # Tests for cases like the one presented in issue 1211, in which the Inflated
    # Beta outputs used a method only available to arrays and not to symbols.
    # We simply go through a short training to ensure no exceptions are raised.
    data = [
        {
            "target": [0, 0.0460043, 0.263906, 0.4103112, 1],
            "start": pd.to_datetime("1999-01-04"),
        },
        {
            "target": [1, 0.65815564, 0.44982578, 0.58875054, 0],
            "start": pd.to_datetime("1999-01-04"),
        },
    ]
    dataset = common.ListDataset(data, freq="W-MON", one_dim_target=True)

    trainer = Trainer(epochs=1, num_batches_per_epoch=2, hybridize=hybridize)

    estimator = deepar.DeepAREstimator(
        freq="W",
        prediction_length=2,
        trainer=trainer,
        distr_output=ZeroAndOneInflatedBetaOutput(),
        context_length=2,
        batch_size=1,
        scaling=False,
    )

    estimator.train(dataset)
Esempio n. 3
0
def build_ff_model():
    # get the csv file as a dataframe
    raw_data = pd.read_csv(
        "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv",
        header=0,
        index_col=0)

    # convert the raw data into an object recognised by GluonTS
    # start: the starting index of the dataframe
    # target: the actual time-series data that we want to model
    # freq: the frequency with which the data is collected
    train_data = common.ListDataset(
        [{
            "start": raw_data.index[0],
            "target": raw_data.value[:"2015-04-05 00:00:00"]
        }],
        freq="5min")

    # create an Estimator with simple feed forward model
    # an object of Trainer() class is used to customize Estimator
    estimator = simple_feedforward.SimpleFeedForwardEstimator(
        freq="5min",
        prediction_length=100,
        trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-3))

    # create a Predictor by training the Estimator with training dataset
    predictor = estimator.train(training_data=train_data)

    # get predictions for the whole forecast horizon
    for model_train_data, predictions in zip(train_data,
                                             predictor.predict(train_data)):
        # plot only the last 100 timestamps of the training dataset
        to_pandas(model_train_data)[-100:].plot()
        # plot the forecasts from the model
        predictions.plot(output_file='ff-model.png', color='r')
Esempio n. 4
0
def prepare_data_univariate(index, values, freq):
    
    data = common.ListDataset([{
        "start": index[0],
        "target": values
    }], freq=freq)
    
    return data
Esempio n. 5
0
deepar算法实现,依赖包 mxnet mxnet-mkl gluon gluonts,pathlib
'''

import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from gluonts.model import deepar
from gluonts.dataset import common
from gluonts.dataset.util import to_pandas
from gluonts.model.predictor import Predictor

# 数据加载
df = pd.read_csv('/home/zhouxi/pig.csv', header=0, index_col=0)
data = common.ListDataset([{
    "start": df.index[100],
    "target": df.price[:"2018-12-05 00:00:00"]
}],
                          freq="D")
train_data = data

# 模型训练
estimator = deepar.DeepAREstimator(freq="D", prediction_length=10)
predictor = estimator.train(training_data=data)

# 模型预测和绘图
for test_entry, forecast in zip(train_data, predictor.predict(train_data)):
    to_pandas(test_entry)[-60:].plot(linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.show()
from gluonts.dataset import common
from gluonts.dataset.util import to_pandas
from gluonts.model.predictor import Predictor
from gluonts.trainer import Trainer
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv'
df = pd.read_csv(csv_path,header=0,sep=',')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index(['timestamp'],inplace=True)

# print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53]
# print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据
# print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息
data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的
# 这里df.index是时间戳,df.value是时间戳对应的值

estimator = deepar.DeepAREstimator(
    freq='H',
    prediction_length=24,
    trainer=Trainer(epochs=50)
)

predictor = estimator.train(training_data=data)

predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save"))
for train_entry, predict_result in zip(data, predictor.predict(data)):
    to_pandas(train_entry)[-60:].plot(linewidth=2)
    predict_result.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
Esempio n. 7
0
# -- coding UTF-8 --     

import matplotlib.pyplot as plt
import pandas as pd

from pathlib import Path
from gluonts.model import deepar
from gluonts.dataset import common
from gluonts.dataset.util import to_pandas
from gluonts.model.predictor import Predictor

url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv"
df   = pd.read_csv(url, header=0, index_col=0)
data = common.ListDataset([{"start": df.index[0],
                            "target": df.value[:"2015-04-23 00:00:00"]}],
                          freq="H")

estimator = deepar.DeepAREstimator(freq="H", prediction_length=24)
predictor = estimator.train(training_data=train_data)

for test_entry, forecast in zip(train_data, predictor.predict(train_data))
    to_pandas(test_entry)[-60].plot(linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.show()

prediction = next(predictor.predict(train_data))
print(prediction.mean)
prediction.plot(output_file='graph.png')

predictor.serialize(Path("/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-1/model"))
    def train_and_predict(code, start_date, end_date, data_path, predict_path):
        predict_days = 2
        csv = os.path.join(data_path, '{code}.csv'.format(code=code))
        df = pd.read_csv(csv)

        # skip training data lenght < 360
        if len(df) < 360:
            return False

        # set DT as index, TCLOSE as label and order by DT desc
        df.set_axis([
            'DT', 'CODE', 'NAME', 'TCLOSE', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE',
            'CHG', 'PCHG', 'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP',
            'MCAP'
        ],
                    axis='columns',
                    inplace=True)
        df.drop([
            'CODE', 'NAME', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE', 'CHG', 'PCHG',
            'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP', 'MCAP'
        ],
                axis=1,
                inplace=True)
        df.set_index(['DT'], inplace=True)
        df = df.iloc[df.index.argsort()]

        # fill the lost DT and label (TCLOSE) with last available exchange day's value
        all_dt = [(datetime.strptime(df.index[0], "%Y-%m-%d") +
                   timedelta(days=i)).__format__('%Y-%m-%d')
                  for i in range(1, (
                      datetime.strptime(end_date, "%Y%m%d") -
                      datetime.strptime(df.index[0], "%Y-%m-%d")).days)]
        miss_data = []
        value = df.TCLOSE[df.index[0]]
        for dt in all_dt:
            if dt in df.index:
                value = df.TCLOSE[dt]
            else:
                miss_data.append([dt, value])
        miss_df = pd.DataFrame(miss_data, columns=['DT', 'TCLOSE'])
        miss_df.set_index(['DT'], inplace=True)
        miss_df = miss_df.iloc[miss_df.index.argsort()]

        new_df = pd.concat([df, miss_df], axis=0)
        new_df = new_df.iloc[new_df.index.argsort()]
        new_df['timestamp'] = pd.to_datetime(new_df.index)
        new_df.set_index(['timestamp'], inplace=True)
        new_df = new_df.iloc[new_df.index.argsort()]
        train_data = new_df

        # build the training dataset for deepar
        data = common.ListDataset([{
            'start': train_data.index[0],
            'target': train_data.TCLOSE[:]
        }],
                                  freq='1d')

        # now training the model
        if len(mxnet.test_utils.list_gpus()):
            estimator = deepar.DeepAREstimator(freq='1d',
                                               prediction_length=predict_days,
                                               trainer=Trainer(ctx='gpu',
                                                               epochs=100))
        else:
            estimator = deepar.DeepAREstimator(freq='1d',
                                               prediction_length=predict_days,
                                               trainer=Trainer(epochs=100))
        predictor = estimator.train(training_data=data)

        # predict the future data
        predict = predictor.predict(data, 1)
        predict_list = list(predict)
        max, min, max_id, min_id = predict_list[0].samples.max(
        ), predict_list[0].samples.min(), predict_list[0].samples.argmax(
        ), predict_list[0].samples.argmin()
        predict_x = [(predict_list[0].start_date +
                      timedelta(days=i)).__format__('%Y-%m-%d')
                     for i in range(0, predict_days + 1)]
        predict_y = predict_list[0].samples[0]
        predict_df = pd.DataFrame(zip(pd.to_datetime(predict_x), predict_y),
                                  columns=['DT', 'TCLOSE'])
        predict_df['timestamp'] = predict_df['DT']
        predict_df.set_index('timestamp', inplace=True)
        train_df = train_data.loc[train_data.index[-5:]]
        train_df['DT'] = pd.to_datetime(train_df.index)
        output_df = pd.concat([train_df, predict_df], axis=0)
        if min_id < max_id and (max - min) / min >= 0.099:
            output_df.to_csv(
                os.path.join(predict_path, 'red_{code}.csv'.format(code=code)))
        else:
            output_df.to_csv(
                os.path.join(predict_path,
                             'green_{code}.csv'.format(code=code)))
        return True
Esempio n. 9
0
#df.drop(columns=['label', 'high', 'low', 'volume', 'notional', 'numberOfTrades', "marketHigh", "marketLow", "marketAverage", "marketNotional", "marketNumberOfTrades", "open", "close", "marketOpen", "marketClose", "changeOverTime", "marketChangeOverTime"])
#prices = df.filter(['date','average', 'marketClose'])
'''
train_ds = ListDataset([{
  "start": df.index[0], 
  "target": df.marketClose[:"2020-06-02 04:29:00"]
  }], freq="1min")
test_ds = ListDataset([{
  "start": df.index[-390], 
  "target": df.marketClose["2020-06-05 09:30:00": '2020-06-05 15:59:00' ]
  }], freq="1min")
'''

data = common.ListDataset([{
    "start": df.index[0],
    "target": df.marketClose[:"2020-06-08 15:59:00"]
}],
                          freq="1min")

lots_of_data = common.ListDataset([{
    "start": new_data.index[0],
    "target": new_data.Close[:-1]
}],
                                  freq="1min")

trainer = Trainer(epochs=10, ctx="cpu", num_batches_per_epoch=75)
estimator = deepar.DeepAREstimator(freq="1min",
                                   prediction_length=390,
                                   trainer=trainer,
                                   num_layers=2)
#predictor = estimator.train(training_data=data)
Esempio n. 10
0
from gluonts.dataset import common
from gluonts.model import deepar

import pandas as pd

url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv"
df = pd.read_csv(url, header=0, index_col=0)
data = common.ListDataset([{"start": df.index[0],
                            "target": df.value[:"2015.txt-04-05 00:00:00"]}],
                            freq="5min")

estimator = deepar.DeepAREstimator(freq="5min", prediction_length=12)
predictor = estimator.train(training_data=data)

prediction = next(predictor.predict(data))
print(prediction.mean)
prediction.plot(output_file='graph.png')

if __name__ == "__main__":
    pass