def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast( scaler.transform(df.values[input_ind:input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) df_predict.to_csv("./df_predict.csv", sep=',', index=False) df_test.to_csv("./df_test.csv", sep=',', index=False) return df_predicts, df_test
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): """ Multivariate time series forecasting using Vector Auto-Regressive Model. :param df: pandas.DataFrame, index: time, columns: sensor id, content: data. :param n_forwards: a tuple of horizons. :param n_lags: the order of the VAR model. :param test_ratio: :return: [list of prediction in different horizon], dt_test """ n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) return df_predicts, df_test
def test_transform_df(self): df = pd.DataFrame([[35., 0.], [0., 17.5], [70., 35.]]) expected_result = np.array([[0., -1.], [-1, -0.5], [1., 0.]]) scaler = StandardScaler(mean=35., std=35.) result = scaler.transform(df) self.assertTrue(np.array_equal(expected_result, result.values))
def test_transform(self): data = np.array([[35., 0.], [0., 17.5], [70., 35.]]) expected_result = np.array([[0., -1.], [-1, -0.5], [1., 0.]]) scaler = StandardScaler(mean=35., std=35.) result = scaler.transform(data) self.assertTrue(np.array_equal(expected_result, result))
def setup_dataloader( arr3d, seq_len, horizon, length_dict, train_batch_size, val_batch_size, test_batch_size, scale, features, logger, seq_sampling, ): train_length = length_dict['train_length'] val_length = length_dict['val_length'] test_length = length_dict['test_length'] test_arr3d = arr3d[-test_length:] val_arr3d = arr3d[train_length:train_length + val_length] if val_length > 0 else test_arr3d train_arr3d = arr3d[:train_length] train_arr2d = train_arr3d[:, :, 0] val_arr2d = val_arr3d[:, :, 0] test_arr2d = test_arr3d[:, :, 0] train_z_arr3d = train_arr3d.copy() val_z_arr3d = val_arr3d.copy() test_z_arr3d = test_arr3d.copy() scaler = StandardScaler(mean=train_arr2d.mean(), std=train_arr2d.std(), scale=scale) train_z_arr3d[:, :, 0] = scaler.transform(train_arr2d) val_z_arr3d[:, :, 0] = scaler.transform(val_arr2d) test_z_arr3d[:, :, 0] = scaler.transform(test_arr2d) dataloaders = {} dataloaders['test_loader'] = \ SpatioTemporalDataLoader(test_z_arr3d, test_batch_size, seq_len, horizon, shuffle=False, features=features, seq_sampling=seq_sampling) assert dataloaders[ 'test_loader'].num_batch > 0, 'num_batch for test dataset should be > 0' dataloaders['val_loader'] = \ SpatioTemporalDataLoader(val_z_arr3d, val_batch_size, seq_len, horizon, shuffle=False, features=features, seq_sampling=seq_sampling) dataloaders['train_loader'] = \ SpatioTemporalDataLoader(train_z_arr3d, train_batch_size, seq_len, horizon, shuffle=True, features=features, seq_sampling=seq_sampling) dataloaders['scaler'] = scaler logger.info('[train] | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\ format(train_length, dataloaders['train_loader'].size, dataloaders['train_loader'].num_batch)) logger.info('[validation] | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\ format(val_length, dataloaders['val_loader'].size, dataloaders['val_loader'].num_batch)) logger.info('[test] | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\ format(test_length, dataloaders['test_loader'].size, dataloaders['test_loader'].num_batch)) return dataloaders