def prophet(input_df, kunag, matnr, n, sps): index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() start_date = str(test1["date"][:1]) end_date = str(test1["date"][-1:]) for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) train['ds'] = train.date train['y'] = train.quantity data = train[["ds", "y"]] ############################################################################## # seasonality_prior_scale: Parameter modulating the strength of the # seasonality model. Larger values allow the model to fit larger seasonal # fluctuations, smaller values dampen the seasonality. Can be specified # for individual seasonalities using add_seasonality. ###################################################################### m = Prophet(yearly_seasonality=False, seasonality_prior_scale=sps) m.fit(data) future_data = m.make_future_dataframe(periods=1, freq="W") forecast = m.predict(future_data) pred = forecast["yhat"] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) return y_hat_avg, rms, mae
def moving_average(input_df, kunag, matnr, n, roll): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) y_hat_avg['moving_avg_forecast'] = train['quantity'].rolling( roll).mean().iloc[-1] pred = y_hat_avg['moving_avg_forecast'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='moving_avg_forecast', marker='.') plt.legend(loc='best') plt.title("moving_avg_forecast") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['moving_avg_forecast'] return y_hat_avg, rms, mae
def naive(input_df, kunag, matnr, n): # path = '/home/rahul/Downloads/bharat/time_series1/arima010' # index = str(kunag) +"_"+ str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) y_hat['naive'] = int(dd[len(dd) - 1]) pred = y_hat['naive'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat.set_index("date")['pred_column'], label='Naive Forecast', marker='.') plt.legend(loc='best') plt.title("Naive Forecast") plt.show() # plt.savefig(path + 'Graph_{}.png'.format(index), format="PNG") rms = sqrt(mean_squared_error(test1.quantity, y_hat.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat.pred_column) del y_hat['naive'] return y_hat, rms, mae
def sarima(input_df, kunag, matnr, n, p, d, q): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() start_date = str(test1["date"][:1]) end_date = str(test1["date"][-1:]) order = (p, d, q) for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = sm.tsa.statespace.SARIMAX(train["quantity"], order=order, enforce_stationarity=False, enforce_invertibility=False, trend="n").fit() pred = fit1.predict(1) lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SARIMA' + "_" + str(order), marker='.') plt.legend(loc='best') plt.title("SARIMA") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) return y_hat_avg, rms, mae
def holts_linear(input_df, kunag, matnr, n, sl, ss): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = Holt(np.asarray(train['quantity'])).fit(smoothing_level=sl, smoothing_slope=ss) y_hat_avg['Holt_linear'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_linear'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='Holts linear', marker='.') plt.legend(loc='best') plt.title("Holts linear") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_linear'] return y_hat_avg, rms, mae
def holts_winter(input_df, kunag, matnr, n): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = ExponentialSmoothing( np.asarray(train['quantity']), seasonal_periods=4, trend='add', seasonal='add', ).fit() y_hat_avg['Holt_Winter'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_Winter'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='Holts Winter', marker='.') plt.legend(loc='best') plt.title("Holts Winter") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_Winter'] return y_hat_avg, rms, mae
def ses(input_df, kunag, matnr, n, alpha): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SES', marker='.') plt.legend(loc='best') plt.title("SES") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['SES'] return y_hat_avg, rms, mae
def plot(df, kunag, matnr, n, folder): test1 = train_test_split(df, kunag, matnr, 16)[1] train = train_test_split(df, kunag, matnr, 16)[0] plt.figure(figsize=(18, 10)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') output1, rms1, mae1 = arima(df, kunag, matnr, 16, p, d, q) output2, mae2 = lstm(df, kunag, matnr, epoch, batch, order, lstm_units, verb, folder, train_=True) y_hat_avg1 = output1 # print("y_hat_avg1 :",y_hat_avg1) plt.plot(y_hat_avg1.set_index("date")['pred_column'], label='0,1,1', marker='.') y_hat_avg2 = output2 plt.plot(y_hat_avg2[0], label='lstm_200_1', marker='.') plt.legend(loc='best') plt.title("Arima_lstm Comparision") index = str(kunag) + "-" + str(matnr) plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") return mae1, mae2
def moving_average(input_df, kunag, matnr, n, roll): path = "/home/rahul/Downloads/bharat/time_series1/model_graphs/" folder = path + "/moving_average/" + "window_" + str(roll) if not os.path.exists(folder): os.makedirs(folder) index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) y_hat_avg['moving_avg_forecast'] = train['quantity'].rolling( roll).mean().iloc[-1] pred = y_hat_avg['moving_avg_forecast'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='moving_avg_forecast', marker='.') plt.legend(loc='best') plt.title("moving_avg_forecast") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['moving_avg_forecast'] return y_hat_avg, rms, mae
def croston_tsb(df, kunag, matnr, n, alpha, beta): extra_periods = 1 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): ts, test = train_test_split(df, kunag, matnr, i) d = np.array(ts.quantity) # Transform the input into a numpy array # Transform the input into a numpy array cols = len(d) # Historical p|eriod length d = np.append( d, [np.nan] * extra_periods ) # Append np.nan into the demand array to cover future periods #level (a), probability(p) and forecast (f) a, p, f = np.full((3, cols + extra_periods), np.nan) # Initialization first_occurence = np.argmax(d[:cols] > 0) a[0] = d[first_occurence] p[0] = 1 / (1 + first_occurence) f[0] = p[0] * a[0] # Create all the t+1 forecasts for t in range(0, cols): if d[t] > 0: a[t + 1] = alpha * d[t] + (1 - alpha) * a[t] p[t + 1] = beta * (1) + (1 - beta) * p[t] else: a[t + 1] = a[t] p[t + 1] = (1 - beta) * p[t] f[t + 1] = p[t + 1] * a[t + 1] # Future Forecast a[cols + 1:cols + extra_periods] = a[cols] p[cols + 1:cols + extra_periods] = p[cols] f[cols + 1:cols + extra_periods] = f[cols] pred = f lst.append(pred[pred.size - 1]) pd.DataFrame(lst) df1_ct = pd.DataFrame.from_dict({ "Demand": d, "Forecast": f, "Period": p, "Level": a, "Error": d - f }) df1_ct = df1_ct.dropna() df1_ct['date'] = ts["date"] y_hat_avg['Forecast'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.Forecast)) mae = mean_absolute_error(test1.quantity, y_hat_avg.Forecast) plt.figure(figsize=(12, 8)) plt.plot(ts.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['Forecast'][-16:], label='Croston TSB', marker='.') plt.legend(loc='best') plt.title("Croston TSB") plt.show() return y_hat_avg, rms, mae
def get_preprocessed_data(): """Obtain the preprocessed data.""" tickers = ['snp', 'nyse', 'djia', 'nikkei', 'hangseng', 'ftse', 'dax', 'aord'] closing_data = preprocess.load_data(tickers) time_series = preprocess.preprocess_data(closing_data) training_test_data = preprocess.train_test_split(time_series, train_test_ratio=0.8) return training_test_data
def main(): # Load Data df = pd.read_csv('data/dr250_active.csv') # Preprocess Data df = filter_actions(df) user_items = get_user_items(df) k = 20 user_items_train, test_users = train_test_split(user_items, split_method='leave_k_out', k=k) # Build Model (see https://github.com/benfred/implicit) model = implicit.als.AlternatingLeastSquares(factors=10, regularization=0.1, iterations=30) alpha = 2 item_users_train = user_items_train.transpose() model.fit(item_users_train * alpha) # Validate Model N = 100 mean_model_acc, mean_benchmark_acc = validate(model, user_items, user_items_train, test_users, N=N)
def prophet(input_df, kunag, matnr, n, sps): path = "/home/rahul/Downloads/bharat/time_series1/model_graphs/prophet/" folder = path + "sps_" + str(sps) if not os.path.exists(folder): os.makedirs(folder) index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() start_date = str(test1["date"][:1]) end_date = str(test1["date"][-1:]) for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) train['ds'] = train.date train['y'] = train.quantity data = train[["ds", "y"]] ############################################################################## # seasonality_prior_scale: Parameter modulating the strength of the # seasonality model. Larger values allow the model to fit larger seasonal # fluctuations, smaller values dampen the seasonality. Can be specified # for individual seasonalities using add_seasonality. ###################################################################### m = Prophet(yearly_seasonality=True, seasonality_prior_scale=sps) m.fit(data) future_data = m.make_future_dataframe(periods=1, freq="W") forecast = m.predict(future_data) pred = forecast["yhat"] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['prediction'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['prediction'], label='prophet', marker='.') plt.legend(loc='best') plt.title("prophet") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.prediction)) mae = mean_absolute_error(test1.quantity, y_hat_avg.prediction) return y_hat_avg, rms, mae
def average_forecast(input_df, kunag, matnr, n): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) y_hat_avg['avg_forecast'] = train['quantity'].mean() pred = y_hat_avg['avg_forecast'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['avg_forecast'] return y_hat_avg, rms, mae
def sarima(input_df, kunag, matnr, n, p, d, q): path = "/home/rahul/Downloads/bharat/time_series1/model_graphs/sarima/" order = (p, q, d) folder = path + str(order) if not os.path.exists(folder): os.makedirs(folder) index = str(kunag) + "-" + str(matnr) index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() start_date = str(test1["date"][:1]) end_date = str(test1["date"][-1:]) order = (p, d, q) for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = sm.tsa.statespace.SARIMAX(train["quantity"], order=order, enforce_stationarity=False, enforce_invertibility=False, trend="n").fit() pred = fit1.predict(1) lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SARIMA' + "_" + str(order), marker='.') plt.legend(loc='best') plt.title("SARIMA") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) return y_hat_avg, rms, mae
def naive(input_df, kunag, matnr, n): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) y_hat['naive'] = int(dd[len(dd) - 1]) pred = y_hat['naive'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat.pred_column) del y_hat['naive'] return y_hat, rms, mae
def holts_linear(input_df, kunag, matnr, n, sl, ss): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = Holt(np.asarray(train['quantity'])).fit(smoothing_level=sl, smoothing_slope=ss) y_hat_avg['Holt_linear'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_linear'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_linear'] return y_hat_avg, rms, mae
def ses(input_df, kunag, matnr, n, sl): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=sl, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['SES'] return y_hat_avg, rms, mae
def holts_winter(input_df, kunag, matnr, n, sp): path = "/home/rahul/Downloads/bharat/time_series1/model_graphs/" folder = path + "/holts_winter/" + str(sp) if not os.path.exists(folder): os.makedirs(folder) index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = ExponentialSmoothing( np.asarray(train['quantity']), seasonal_periods=sp, trend='add', seasonal='add', ).fit() y_hat_avg['Holt_Winter'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_Winter'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='Holts Winter', marker='.') plt.legend(loc='best') plt.title("Holts Winter") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_Winter'] return y_hat_avg, rms, mae
def ses(input_df, kunag, matnr, n, alpha): path = "/home/rahul/Downloads/bharat/time_series1/model_graphs/" folder = path + "/ses/" + "sl_" + str(alpha) if not os.path.exists(folder): os.makedirs(folder) index = str(kunag) + "-" + str(matnr) i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train') plt.plot(test1.set_index("date")['quantity'], label='Test') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SES', marker=".") plt.legend(loc='best') plt.title("SES") plt.savefig(folder + "/" + 'Graph_{}.png'.format(index), format="PNG") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['SES'] return y_hat_avg, rms, mae
def holts_winter(input_df, kunag, matnr, n): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = ExponentialSmoothing( np.asarray(train['quantity']), seasonal_periods=4, trend='add', seasonal='add', ).fit() y_hat_avg['Holt_Winter'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_Winter'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_Winter'] return y_hat_avg, rms, mae
def sarima(input_df, kunag, matnr, n, p, d, q): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() start_date = str(test1["date"][:1]) end_date = str(test1["date"][-1:]) order = (p, d, q) for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = sm.tsa.statespace.SARIMAX(train["quantity"], order=order, enforce_stationarity=False, enforce_invertibility=False, trend="n").fit() pred = fit1.predict(1) lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) return y_hat_avg, rms, mae
def fit(self,X,y,sample_weight = None): X,X_valid,y,y_valid = pre.train_test_split(X,y) self.model = Sequential() for layer in self.layers : if self.verbose >= 1 : print("Adding "+ str(layer['type']) +"(" +str(layer['params']) +") ...") self.model.add(layer["type"](**layer["params"])) self.model.compile(optimizer=self.optimizer, loss = self.loss) self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=self.nb_epoch, shuffle=self.shuffle, verbose=self.verbose, validation_data=(X_valid, y_valid), callbacks=self.callbacks) self._on = True return self
def lstm(df, kunag, matnr, epoch, batch, order, lstm_units, verb, folder, train_=True): train, test = train_test_split(df, kunag, matnr, 16) dataframe = n_series(df, kunag, matnr) index = str(kunag) + "_" + str(matnr) order = order test_points = 16 df_testing_complete = dataframe[-16:] test_predictions = [] df = dataframe[:-test_points] df_training_complete = df df_training_processed = df_training_complete.iloc[:, 1:2].values scaler = MinMaxScaler(feature_range=(0, 1)) df_training_scaled = scaler.fit_transform(df_training_processed) for k in range(0, test_points): if (k == 0) and (train_): print("Training model for " + str(index) + "...") df = dataframe[:-test_points + k] df_training_complete = df df_training_processed = df_training_complete.iloc[:, 1:2].values scaler = MinMaxScaler(feature_range=(0, 1)) df_training_scaled = scaler.fit_transform(df_training_processed) features_set = [] labels = [] for i in range(order + 1, df.shape[0]): features_set.append(df_training_scaled[i - order:i, 0]) labels.append(df_training_scaled[i, 0]) features_set, labels = np.array(features_set), np.array(labels) features_set = np.reshape( features_set, (features_set.shape[0], features_set.shape[1], 1)) # print(features_set.size) model = Sequential() model.add( LSTM(units=lstm_units, return_sequences=False, input_shape=(features_set.shape[1], 1))) model.add(Dropout(0.2)) model.add(Dense(units=1)) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(features_set, labels, epochs=epoch, batch_size=batch, verbose=verb) model_json = model.to_json() with open("pretrained/weights/model_(" + str(index) + ").json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("pretrained/weights/model_(" + str(index) + ").h5") print("Saved model to disk") # load json and create model json_file = open( "pretrained/weights/model_(" + str(index) + ").json", 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("pretrained/weights/model_(" + str(index) + ").h5") print("Loaded model from disk") loaded_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) elif k == 0 and not train_: # load json and create model json_file = open( "pretrained/weights/model_(" + str(index) + ").json", 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("pretrained/weights/model_(" + str(index) + ").h5") print("Loaded model from disk") # evaluate loaded model on test data loaded_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) df_testing_processed = df_testing_complete.iloc[k:k + 1, 1:2].values df_total = pd.concat((df_training_complete['quantity'], df_testing_complete['quantity']), axis=0) test_inputs = df_total[len(df_total) - len(df_testing_complete) - order + k:].values test_inputs = test_inputs.reshape(-1, 1) test_inputs = scaler.transform(test_inputs) test_features = [] test_features.append(test_inputs[1:order + 1, 0]) test_features = np.array(test_features) test_features = np.reshape( test_features, (test_features.shape[0], test_features.shape[1], 1)) predictions = loaded_model.predict(test_features) predictions = scaler.inverse_transform(predictions) test_predictions.append(predictions) test_predictions_1 = [i[0][0] for i in test_predictions] df_c = pd.DataFrame(data=test_predictions_1) test.reset_index(inplace=True) pred = pd.concat([test, df_c], axis=1, join_axes=[test.index]) pred.set_index("date", inplace=True) pred.drop(['index', 'quantity'], axis=1, inplace=True) test.drop(['index'], axis=1, inplace=True) mae = mean_absolute_error(test.quantity, pred[0]) rms = sqrt(mean_squared_error(test.quantity, pred[0])) print("mae :", mae) print("rms :", rms) dataframe.set_index('date', inplace=True) train.set_index('date', inplace=True) test.set_index('date', inplace=True) # plt.figure(figsize=(16,8)) # plt.plot(dataframe, marker='.', color='blue') # plt.plot(train, marker='.', color='blue',label = "Train") # plt.plot(test, marker='.', color='orange',label = "Test") # plt.plot(pred,marker = ".",color = 'green',label = "Prediction") # plt.xlabel("time") # plt.ylabel('quantity') # plt.legend(loc='best') # plt.title("batch : " + str(batch)+"_" + "epochs : " + str(epoch)) # plt.savefig(folder+"/" + 'Graph_{}_{}_{}_{}.png'.format(index,batch,epoch,lstm_units), format="PNG") return pred, mae
def main(): idx = pd.IndexSlice date_col = 'start_date' target = pd.read_hdf(cfg.data_target_file) data = pd.read_hdf(cfg.data_cov_file) train_start_date = cfg.train_start_date end_date = cfg.end_date time_index = pd.date_range(train_start_date, end_date, freq='1D') existing_dates = [str(t[2]).split(" ")[0] for t in target.index] unique_dates = list(set(existing_dates)) target = target.loc[idx[:, :, unique_dates], :] data = data.loc[idx[unique_dates], :] cv_path = cfg.rootpath_cv forecast_path = cfg.forecast_rootpath target_var = cfg.target_var val_years = cfg.val_years test_years = cfg.test_years val_train_range = cfg.val_train_range test_train_range = cfg.test_train_range past_years = cfg.past_kyears val_range = cfg.val_range val_freq = cfg.val_freq test_start_date = cfg.test_start_date test_time_index_all = pd.date_range(test_start_date, end_date, freq='7D') # to create train-validation sets for year in val_years: for num_forecast in range(1, 2): preprocess.train_val_split(cv_path, data, target, target_var, year, num_forecast, train_range=val_train_range, past_years=past_years, test_range=val_range, test_freq=val_freq, n_jobs=20) # to create train-test sets for year in test_years: for num_forecast in range(1, 2): preprocess.train_test_split(forecast_path, data, target, target_var, test_time_index_all, year, num_forecast, train_range=test_train_range, past_years=past_years, n_jobs=20)
def main(): # read in data and train/test split labels_dir = '../data/genki4k/labels.txt' cropped_dir = '../data/genki4k/files/cropped' # read in data data_sample, labels, headpose = read_raw_data(cropped_dir, labels_dir) # visualize_imgs(data_sample) # train-test split train_data, train_labels, train_headpose, test_data, test_labels, test_headpose = train_test_split( data_sample, labels, headpose) # get hog features train_features = get_hog_features(train_data) test_features = get_hog_features(test_data) print("got features") # feed features into pca pca = PCA(n_components=0.95) pca.fit(train_features) train_features = pca.transform(train_features) test_features = pca.transform(test_features) # initialize support vector classifer with linear kernel svc = SVC(kernel='linear', probability=False, C=5) # k-fold cross-validation on base images # k = 5 # scores_cross_validation_svm(svc, train_data, train_labels, k) # evaluate_performance(svc, train_data, train_labels, test_data, test_labels) # k-fold cross validation w/ hog_features k = 5 scores_cross_validation_svm(svc, train_features, train_labels, k, hog_features=True) evaluate_performance(svc, train_features, train_labels, test_features, test_labels, hog_features=True) # saving trained model filename = 'trained_model.sav' pickle.dump(svc, open(filename, 'wb')) # saving pca transformation pca_filename = 'trained_feature_transform.sav' pickle.dump(pca, open(pca_filename, 'wb'))
# Train #################################################################################################### if is_train: for seed in SEED: # This use the context manager to operate in the data directory with cd(Name+f'-{seed}'): pickle.dump(sym_params, open("sym_params.sav", "wb")) logfile = open('log.txt','w+') resultfile = open('result.txt','w+') if os.path.exists('test.sav'): logfile.write('Did not calculate symfunctions.\n') else: data_dict = snn2sav(db, Name, elements, params_set, element_energy=element_energy) train_dict = train_test_split(data_dict,1-test_percent,seed=seed) train_val_split(train_dict,1-val_percent,seed=seed) logfile.flush() train_dict = torch.load('final_train.sav') val_dict = torch.load('final_val.sav') test_dict = torch.load('test.sav') scaling = get_scaling(train_dict, fp_scale_method, e_scale_method) n_nodes = hp['n_nodes'] activations = hp['activations'] lr = hp['lr'] model = MultiLayerNet(N_sym, n_nodes, activations, nelem, scaling=scaling)
def split_ratings_matrix(self): '''process and split the matrix of user ratings given the ratio of the dataset used for evaluation''' train, test = train_test_split(self.ratings_matrix, self.test_split_ratio) return train, test