def anomaly_gluonts(lista_datos, num_fut, desv_mse=0, train=True, name='model-name'): lista_puntos = np.arange(0, len(lista_datos), 1) df, df_train, df_test = create_train_test(lista_puntos, lista_datos) data_list = [{ "start": "01-01-2012 04:05:00", "target": df_train['valores'].values }] dataset = ListDataset(data_iter=data_list, freq="5min") trainer = Trainer(epochs=15) estimator = deepar.DeepAREstimator(freq="5min", prediction_length=len( df_test['valores']), trainer=trainer) predictor = estimator.train(training_data=dataset) prediction = next(predictor.predict(dataset)) engine = engine_output_creation('gluonts') engine.alerts_creation(prediction.mean.tolist(), df_test) engine.debug_creation(prediction.mean.tolist(), df_test) print('longitud del test' + str(df_test.shape) + 'frente a la prediccion' + str(len(prediction.mean.tolist()))) engine.metrics_generation(df_test['valores'].values, prediction.mean.tolist()) ############## ANOMALY FINISHED, print("Anomaly finished. Start forecasting") ############## FORECAST START data_list = [{ "start": "01-01-2012 04:05:00", "target": df['valores'].values }] dataset = ListDataset(data_iter=data_list, freq="5min") trainer = Trainer(epochs=15) estimator = deepar.DeepAREstimator(freq="5min", prediction_length=num_fut, trainer=trainer) predictor = estimator.train(training_data=dataset) prediction = next(predictor.predict(dataset)) engine.forecast_creation(prediction.mean.tolist(), len(lista_datos), num_fut) return (engine.engine_output)
def test_symbol_and_array(hybridize: bool): # Tests for cases like the one presented in issue 1211, in which the Inflated # Beta outputs used a method only available to arrays and not to symbols. # We simply go through a short training to ensure no exceptions are raised. data = [ { "target": [0, 0.0460043, 0.263906, 0.4103112, 1], "start": pd.to_datetime("1999-01-04"), }, { "target": [1, 0.65815564, 0.44982578, 0.58875054, 0], "start": pd.to_datetime("1999-01-04"), }, ] dataset = common.ListDataset(data, freq="W-MON", one_dim_target=True) trainer = Trainer(epochs=1, num_batches_per_epoch=2, hybridize=hybridize) estimator = deepar.DeepAREstimator( freq="W", prediction_length=2, trainer=trainer, distr_output=ZeroAndOneInflatedBetaOutput(), context_length=2, batch_size=1, scaling=False, ) estimator.train(dataset)
def build_deepar_model(): # get the financial data "exchange_rate" gluon_data = get_dataset("exchange_rate", regenerate=True) train_data = next(iter(gluon_data.train)) test_data = next(iter(gluon_data.test)) meta_data = gluon_data.metadata # data set visualisation fig, ax = plt.subplots(1, 1, figsize=(11, 8)) to_pandas(train_data).plot(ax=ax) ax.grid(which="both") ax.legend(["train data"], loc="upper left") plt.savefig("dataset.png") # visualize various members of the 'gluon_data.*' print(train_data.keys()) print(test_data.keys()) print(meta_data) # convert dataset into an object recognised by GluonTS training_data = common.ListDataset(gluon_data.train, freq=meta_data.freq) testing_data = common.ListDataset(gluon_data.test, freq=meta_data.freq) # create an Estimator with DeepAR # an object of Trainer() class is used to customize Estimator estimator = deepar.DeepAREstimator( freq=meta_data.freq, prediction_length=meta_data.prediction_length, trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-4)) # create a Predictor by training the Estimator with training dataset predictor = estimator.train(training_data=training_data) # make predictions forecasts, test_series = make_evaluation_predictions(dataset=testing_data, predictor=predictor, num_samples=10) # visualise forecasts prediction_intervals = (50.0, 90.0) legend = ["actual data", "median forecast" ] + [f"{k}% forecast interval" for k in prediction_intervals][::-1] fig, ax = plt.subplots(1, 1, figsize=(11, 8)) list(test_series)[0][-150:].plot(ax=ax) # plot the time series list(forecasts)[0].plot(prediction_intervals=prediction_intervals, color='r') plt.grid(which="both") plt.legend(legend, loc="upper left") plt.savefig("deepar-model.png")
from gluonts.model import deepar from gluonts.dataset import common from gluonts.dataset.util import to_pandas from gluonts.model.predictor import Predictor # 数据加载 df = pd.read_csv('/home/zhouxi/pig.csv', header=0, index_col=0) data = common.ListDataset([{ "start": df.index[100], "target": df.price[:"2018-12-05 00:00:00"] }], freq="D") train_data = data # 模型训练 estimator = deepar.DeepAREstimator(freq="D", prediction_length=10) predictor = estimator.train(training_data=data) # 模型预测和绘图 for test_entry, forecast in zip(train_data, predictor.predict(train_data)): to_pandas(test_entry)[-60:].plot(linewidth=2) forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.show() prediction = next(predictor.predict(train_data)) print(prediction.mean) prediction.plot(output_file='graph.png') # 模型存储 predictor.serialize(Path("/home/zhouxi/my_product/ai_test/lstm/model/"))
from gluonts.dataset import common from gluonts.dataset.util import to_pandas from gluonts.model.predictor import Predictor train_data = common.FileDataset( "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/train", freq="H") test_data = common.FileDataset( "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/val", freq="H") estimator = deepar.DeepAREstimator(prediction_length=24, context_length=100, use_feat_static_cat=True, use_feat_dynamic_real=True, num_parallel_samples=100, cardinality=[2, 1], freq="H", trainer=Trainer(ctx="cpu", epochs=200, learning_rate=1e-3)) predictor = estimator.train(training_data=train_data) for test_entry, forecast in zip(test_data, predictor.predict(test_data)): to_pandas(test_entry)[-100:].plot(figsize=(12, 5), linewidth=2) forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.legend([ "past observations", "median prediction", "90% prediction interval", "50% prediction interval" ]) plt.show()
import pandas as pd import matplotlib.pyplot as plt csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv' df = pd.read_csv(csv_path,header=0,sep=',') df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index(['timestamp'],inplace=True) # print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53] # print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据 # print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息 data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的 # 这里df.index是时间戳,df.value是时间戳对应的值 estimator = deepar.DeepAREstimator( freq='H', prediction_length=24, trainer=Trainer(epochs=50) ) predictor = estimator.train(training_data=data) predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save")) for train_entry, predict_result in zip(data, predictor.predict(data)): to_pandas(train_entry)[-60:].plot(linewidth=2) predict_result.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.show() ##输出预测结果 prediction = next(predictor.predict(data)) print(prediction.mean) prediction.plot(output_file='graph.png')
def forecast_dataset(dataset, epochs=100, learning_rate=1e-3, num_samples=100, model="SimpleFeedForward", r_method="ets", alpha=0, distrib="Gaussian"): if distrib == "Gaussian": distr_output = GaussianOutput() elif distrib == "Laplace": distr_output = LaplaceOutput() elif distrib == "PiecewiseLinear": distr_output = PiecewiseLinearOutput(num_pieces=2) elif distrib == "Uniform": distr_output = UniformOutput() elif distrib == "Student": distr_output = StudentTOutput() else: distr_output = None if model != "GaussianProcess": ctx = mx.Context("gpu") else: ctx = mx.Context("cpu") # Trainer trainer = Trainer(epochs=epochs, learning_rate=learning_rate, num_batches_per_epoch=100, ctx=ctx, hybridize=True if model[0] != "c" else False) # Estimator (if machine learning model) if model == "SimpleFeedForward": # 10s / epochs for context 60*24 estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=dataset.prediction_length, context_length=dataset.context_length, freq=dataset.freq, trainer=trainer, distr_output=distr_output) elif model == "cSimpleFeedForward": # 10s / epochs for context 60*24 estimator = CustomSimpleFeedForwardEstimator( prediction_length=dataset.prediction_length, context_length=dataset.context_length, freq=dataset.freq, trainer=trainer, num_cells=40, alpha=alpha, distr_output=distr_output, distr_output_type=distrib) elif model == "CanonicalRNN": # 80s /epochs for context 60*24, idem for 60*1 estimator = canonical.CanonicalRNNEstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepAr": estimator = deepar.DeepAREstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepFactor": # 120 s/epochs if one big time serie, 1.5s if 183 time series estimator = deep_factor.DeepFactorEstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepState": # Very slow on cpu estimator = deepstate.DeepStateEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length, trainer=trainer, cardinality=list([1]), use_feat_static_cat=False) elif model == "GaussianProcess": # CPU / GPU problem estimator = gp_forecaster.GaussianProcessEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length, trainer=trainer, cardinality=1) elif model == "NPTS": estimator = npts.NPTSEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length) elif model == "MQCNN": estimator = seq2seq.MQCNNEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995])) elif model == "MQRNN": estimator = seq2seq.MQRNNEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995])) elif model == "RNN2QR": # Must be investigated estimator = seq2seq.RNN2QRForecaster( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, cardinality=dataset.cardinality, embedding_dimension=1, encoder_rnn_layer=1, encoder_rnn_num_hidden=1, decoder_mlp_layer=[1], decoder_mlp_static_dim=1) elif model == "SeqToSeq": # Must be investigated estimator = seq2seq.Seq2SeqEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, cardinality=[1], embedding_dimension=1, decoder_mlp_layer=[1], decoder_mlp_static_dim=1, encoder=Seq2SeqEncoder()) elif model == "Transformer": # Make the computer lag the first time estimator = transformer.TransformerEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer) else: estimator = None # Predictor (directly if non machine learning model and from estimator if machine learning) if model == "Prophet": predictor = prophet.ProphetPredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, ) elif model == "R": predictor = r_forecast.RForecastPredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, method_name=r_method) elif model == "SeasonalNaive": predictor = seasonal_naive.SeasonalNaivePredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, season_length=24) else: predictor = estimator.train(dataset.train_ds) if model[0] != "c": predictor.serialize(Path("temp")) predictor = Predictor.deserialize( Path("temp"), ctx=mx.cpu(0)) # fix for deepstate # Evaluate forecast_it, ts_it = make_evaluation_predictions( dataset=dataset.test_ds, # test dataset predictor=predictor, # predictor num_samples=num_samples, # num of sample paths we want for evaluation ) return list(forecast_it), list(ts_it)
timesteps = df['Date'] start = timesteps[0] custom_dataset = np.array(df['KRW/USD']) # NUMPY PHASE N = 1 # number of classes in time series T = len(custom_dataset) # number of time steps prediction_length = 100 freq = "1D" # CONVERSION TO GLUON PHASE from gluonts.dataset.common import ListDataset # # train is less of "prediction length" compared to test dataset train_ds = ListDataset([{'target': custom_dataset, 'start': start}], freq=freq) # # test dataset: use the whole dataset, add "target" and "start" fields test_ds = ListDataset([{'target': custom_dataset, 'start': start}], freq=freq) estimator = deepar.DeepAREstimator(freq=freq, prediction_length=prediction_length) predictor = estimator.train(training_data=train_ds) prediction = next(predictor.predict(train_ds)) print(prediction.mean) prediction.plot(output_file='graph.png') if __name__ == "__main__": pass
"Name": "test:smape", "Regex": r"gluonts\[metric-sMAPE\]: (\S+)" }, { "Name": "test:wmape", "Regex": r"gluonts\[metric-wMAPE\]: (\S+)" }, ] estimator = deepar.DeepAREstimator( prediction_length=gluonts_datasets.metadata.prediction_length, freq=gluonts_datasets.metadata.freq, cardinality=[ gluonts_datasets.metadata.feat_static_cat[0].cardinality, gluonts_datasets.metadata.feat_static_cat[1].cardinality, gluonts_datasets.metadata.feat_static_cat[2].cardinality ], use_feat_static_cat=True, # use_feat_dynamic_real=True, #use_feat_dynamic_cat = True, # use_feat_static_real=True, #time_features= time_features, trainer=Trainer(epochs=epochs)) estimator = deepstate.DeepStateEstimator( prediction_length=gluonts_datasets.metadata.prediction_length, freq=gluonts_datasets.metadata.freq, cardinality=[ gluonts_datasets.metadata.feat_static_cat[0].cardinality, gluonts_datasets.metadata.feat_static_cat[1].cardinality, gluonts_datasets.metadata.feat_static_cat[2].cardinality ],
df = pd.read_csv(csv_path, header=0, sep=',') df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index(['timestamp'], inplace=True) # print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53] # print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据 # print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息 data = common.ListDataset([{ 'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"] }], freq='H') #这个数据格式是固定的 # 这里df.index是时间戳,df.value是时间戳对应的值 estimator = deepar.DeepAREstimator(freq='H', prediction_length=24) predictor = Predictor.deserialize( Path( "/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save" )) # predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save")) print("data:", data) print('....' * 5) print(predictor.predict(data)) print('####' * 5) for train_entry, predict_result in zip(data, predictor.predict(data)): print(to_pandas(train_entry)[:60]) print('-------' * 4) print(to_pandas(train_entry) [-60:]) # 这里把最后的60个输出,其中每个数据的时间戳是小时,时间的起点为2015-02-26 21:00:00
from gluonts.dataset import common from gluonts.model import deepar import pandas as pd url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv" df = pd.read_csv(url, header=0, index_col=0) data = common.ListDataset([{ "start": df.index[0], "target": df.value[:"2015-04-05 00:00:00"] }], freq="5min") estimator = deepar.DeepAREstimator(freq="5min", prediction_length=12) predictor = estimator.train(training_data=data) prediction = next(predictor.predict(data)) print(prediction.mean) prediction.plot(output_file='graph.png')
def train_and_predict(code, start_date, end_date, data_path, predict_path): predict_days = 2 csv = os.path.join(data_path, '{code}.csv'.format(code=code)) df = pd.read_csv(csv) # skip training data lenght < 360 if len(df) < 360: return False # set DT as index, TCLOSE as label and order by DT desc df.set_axis([ 'DT', 'CODE', 'NAME', 'TCLOSE', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE', 'CHG', 'PCHG', 'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP', 'MCAP' ], axis='columns', inplace=True) df.drop([ 'CODE', 'NAME', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE', 'CHG', 'PCHG', 'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP', 'MCAP' ], axis=1, inplace=True) df.set_index(['DT'], inplace=True) df = df.iloc[df.index.argsort()] # fill the lost DT and label (TCLOSE) with last available exchange day's value all_dt = [(datetime.strptime(df.index[0], "%Y-%m-%d") + timedelta(days=i)).__format__('%Y-%m-%d') for i in range(1, ( datetime.strptime(end_date, "%Y%m%d") - datetime.strptime(df.index[0], "%Y-%m-%d")).days)] miss_data = [] value = df.TCLOSE[df.index[0]] for dt in all_dt: if dt in df.index: value = df.TCLOSE[dt] else: miss_data.append([dt, value]) miss_df = pd.DataFrame(miss_data, columns=['DT', 'TCLOSE']) miss_df.set_index(['DT'], inplace=True) miss_df = miss_df.iloc[miss_df.index.argsort()] new_df = pd.concat([df, miss_df], axis=0) new_df = new_df.iloc[new_df.index.argsort()] new_df['timestamp'] = pd.to_datetime(new_df.index) new_df.set_index(['timestamp'], inplace=True) new_df = new_df.iloc[new_df.index.argsort()] train_data = new_df # build the training dataset for deepar data = common.ListDataset([{ 'start': train_data.index[0], 'target': train_data.TCLOSE[:] }], freq='1d') # now training the model if len(mxnet.test_utils.list_gpus()): estimator = deepar.DeepAREstimator(freq='1d', prediction_length=predict_days, trainer=Trainer(ctx='gpu', epochs=100)) else: estimator = deepar.DeepAREstimator(freq='1d', prediction_length=predict_days, trainer=Trainer(epochs=100)) predictor = estimator.train(training_data=data) # predict the future data predict = predictor.predict(data, 1) predict_list = list(predict) max, min, max_id, min_id = predict_list[0].samples.max( ), predict_list[0].samples.min(), predict_list[0].samples.argmax( ), predict_list[0].samples.argmin() predict_x = [(predict_list[0].start_date + timedelta(days=i)).__format__('%Y-%m-%d') for i in range(0, predict_days + 1)] predict_y = predict_list[0].samples[0] predict_df = pd.DataFrame(zip(pd.to_datetime(predict_x), predict_y), columns=['DT', 'TCLOSE']) predict_df['timestamp'] = predict_df['DT'] predict_df.set_index('timestamp', inplace=True) train_df = train_data.loc[train_data.index[-5:]] train_df['DT'] = pd.to_datetime(train_df.index) output_df = pd.concat([train_df, predict_df], axis=0) if min_id < max_id and (max - min) / min >= 0.099: output_df.to_csv( os.path.join(predict_path, 'red_{code}.csv'.format(code=code))) else: output_df.to_csv( os.path.join(predict_path, 'green_{code}.csv'.format(code=code))) return True
data = common.ListDataset([{ "start": df.index[0], "target": df.marketClose[:"2020-06-08 15:59:00"] }], freq="1min") lots_of_data = common.ListDataset([{ "start": new_data.index[0], "target": new_data.Close[:-1] }], freq="1min") trainer = Trainer(epochs=10, ctx="cpu", num_batches_per_epoch=75) estimator = deepar.DeepAREstimator(freq="1min", prediction_length=390, trainer=trainer, num_layers=2) #predictor = estimator.train(training_data=data) trial_estimator = SimpleFeedForwardEstimator(num_hidden_dimensions=[10], prediction_length=390, context_length=780, freq="1min", trainer=Trainer( ctx="cpu", epochs=5, learning_rate=1e-30, hybridize=False, num_batches_per_epoch=100)) predictor = estimator.train(lots_of_data)
# model # estimator = SimpleFeedForwardEstimator( # num_hidden_dimensions=[100], # prediction_length=prediction_length, # context_length=T-prediction_length, # freq=freq, # trainer=Trainer(ctx="cpu", epochs=1000, learning_rate=1e-4, num_batches_per_epoch=1), # ) from gluonts.model import deepar estimator = deepar.DeepAREstimator(freq=freq, num_layers=4, num_cells=1, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=500, learning_rate=1e-4, num_batches_per_epoch=35, minimum_learning_rate=0)) predictor = estimator.train(training_data=train_ds) from gluonts.evaluation.backtest import make_evaluation_predictions forecast_it, ts_it = make_evaluation_predictions( dataset=test_ds, # test dataset predictor=predictor, # predictor num_samples=1000, # number of sample paths we want for evaluation ) # return vals are generators