def main(iteration, name): smape_dict = {} print("xgboost with redefine is running") start = time.perf_counter() #loading the data data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:, 0].to_list() #70/30 train/test split split = int(0.7 * len(data)) train, test = data[:split], data[split:] setback = len(train) predictions = [] ground_truth = [] for i in range(len(test)): #get breakpoints for train history = functions.ada_preprocessing(train) #save the final set of breakpoints bkp = None if i == len(test) - 1: bkp = history["concept"] history = one_hot_encoding(history) #add new test observation to train series train.append(test[i]) #path the last point from history dataframe to then extract same concept dummies test_df = manual_preprocessing(train, history.tail(1)) ground_truth.append(train[-1]) #training data = history prediction = xgboost_forecast(history, test_df.loc[:, "t-1":]) predictions.append(prediction) end = time.perf_counter() print("Time spent on xgboost with retrain: {:.2f}m".format( (end - start) / 60)) error = smape(np.asarray(predictions), np.asarray(ground_truth)) smape_dict[name] = error # print("SMAPE: {:.4f}".format(error)) #plot_save(predictions, ground_truth, bkp, "results/xgboost/redefine/"+name, setback) dict_path = "results/xgboost/redefine/errors/error" + str( iteration) + name + ".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n" % (key, smape_dict[key]))
def main(iteration, name): print("xgboost with retrain is running") smape_dict = {} #loading the data #print("xgboost with retrain is alive") data = pd.read_csv("data/"+name, usecols = [iteration]).iloc[:,0].to_list() #note: i only use this to get the lagged values, the concepts and others are dropped subsequently data = functions.ada_preprocessing(data) data = data.loc[:, "t":"t-5"] #train/test split n = len(data) train, test = data[:int(0.7*n)], data[int(0.7*n):] #fitting and plotting with concept start = time.perf_counter() error, y, yhat = walk_forward_validation(train, test) end = time.perf_counter() print("Time spent on xgboost with retrain: {:.2f}s".format((end-start))) smape_dict[name] = error # print("SMAPE: {:.4f}".format(error)) #plt.plot(y, label = "Expected", color = "black") #plt.plot(yhat, label = "Predicted", color = "red") #plt.legend() #plt.title(name) #saving the plots #image_path = "results/xgboost/retrain/"+name+".png" #plt.savefig(image_path) #plt.clf() # plt.show() #saving the dictionary containing errors dict_path = "results/xgboost/retrain/errors/error"+str(iteration)+name+".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n"%(key,smape_dict[key]))
lin1_abrupt = create_simdata.linear1_abrupt() lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt) series = pd.DataFrame({"t":lin1_abrupt}) series = functions.autocorrelations_in_window(10, series) series = functions.partial_autocorrelations_in_window(10, series) series = functions.features_in_window(10, series) series = functions.oscillation_behaviour_in_window(10, series) timeseries = create_simdata.linear1_abrupt() timeseries = functions.ada_preprocessing(timeseries, delay_correction=2) nonlinear2_abrupt_raw = create_simdata.nonlinear3_abrupt() nonlinear2_abrupt = functions.preprocess_timeseries(nonlinear2_abrupt_raw, windowsize=20) plt.plot(nonlinear2_abrupt) lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt)
def main(iteration, name): smape_dict = {} print("xgboost with discard is running") start = time.perf_counter() #loading the data data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:, 0].to_list() #70/30 train/test split split = int(0.7 * len(data)) train, test = data[:split], data[split:] setback = len(train) #get breakpoints for train set history = functions.ada_preprocessing(train) #note the last concept that appeared last_num_concepts = max(list(history["concept"])) predictions = [] points = 0 bkp = None for i in range(len(test)): #add new test observation to train series train.append(test[i]) #pass all the values available in series up to and including the new test point test_df = manual_preprocessing(train) #training data = history prediction = xgboost_forecast(history.loc[:, "t":"t-5"], test_df.loc[:, "t-1":"t-5"]) predictions.append(prediction) #new dataframe with the predicted test observation already appended history = functions.ada_preprocessing(train) if i == len(test) - 1: bkp = history["concept"] #note the real concept for the test observation new_num_concepts = max(list(history["concept"])) #if the number of concepts change, check if we have enough datapoints for new concept if new_num_concepts > last_num_concepts: #if we have more than 20 points for new concept, keep them and drop the rest of the data points = is_enough(history) if points >= 20: history = history.tail(points) last_num_concepts = new_num_concepts points = 0 #otherwise just keep using the same dataset end = time.perf_counter() print("Time spent on xgboost with discard: {:.2f}m".format( (end - start) / 60)) error = smape(np.asarray(predictions), np.asarray(test)) smape_dict[name] = error # print("SMAPE: {:.4f}".format(error)) #plot_save(predictions, ground_truth, bkp, "results/xgboost/discard/"+name, setback) dict_path = "results/xgboost/discard/errors/error" + str( iteration) + name + ".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n" % (key, smape_dict[key]))
def main(iteration, name): print("lstm with discard is running") smape_dict = {} # load the data data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:, 0].to_list() #70/30 train/test split split = int(0.7 * len(data)) train, test = data[:split], data[split:] setback = len(train) #get breakpoints for train set history = functions.ada_preprocessing(train) #note the last concept that appeared last_num_concepts = max(list(history["concept"])) model = fit_lstm(history.loc[:, "t":"t-5"]) predictions = [] points = 0 bkp = None start = time.perf_counter() for i in range(0, len(test)): #get test observation into necessary shape train.append(test[i]) test_row = manual_preprocessing(train) X = test_row.loc[:, "t-1":"t-5"] X_arrays = np.asarray(X) test_X = np.hstack(X_arrays).reshape(X.shape[0], 1, X.shape[1]) #get predictions for new test observation prediction = model.predict(test_X) predictions.append(prediction) #new dataframe with the predicted test observation already appended history = functions.ada_preprocessing(train) if i == len(test) - 1: bkp = history["concept"] #note the real concept for the test observation new_num_concepts = max(list(history["concept"])) #if the number of concepts change, check if we have enough datapoints for new concept if new_num_concepts > last_num_concepts: #if we have more than 20 points for new concept, keep them and drop the rest of the data points = is_enough(history) if points >= 20: print("found {} points from new concept".format(points)) history = history.tail(points) last_num_concepts = new_num_concepts points = 0 # retrain the model model = fit_lstm(history.loc[:, "t":"t-5"]) #otherwise just keep using the same dataset end = time.perf_counter() print("Time spent: {:.2f}h".format((end - start) / 3600)) #inverting predictions to original scale # predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1])) error = smape(np.asarray(predictions), np.asarray(test)) smape_dict[name] = error print("SMAPE: {:.4f}".format(error)) #plot_save(np.asarray(predictions), test, bkp, "results/lstm/discard/"+name, setback) #saving the dictionary containing errors dict_path = "results/lstm/discard/errors/error" + str( iteration) + name + ".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n" % (key, smape_dict[key]))
def main(iteration, name): smape_dict = {} data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:, 0].to_list() #70/30 train/test split split = int(0.7 * len(data)) train, test = data[:split], data[split:] setback = len(train) bkp = None predictions = [] start = time.perf_counter() # need to do first iteration outside the loop to avoid retraining the model history = functions.ada_preprocessing(train) history.drop(["transition", "steps_to_bkp", "steps_since_bkp"], axis=1, inplace=True) #get the dataframe for new test observation train.append(test[0]) test_row = manual_preprocessing(train, history.tail(1)) #change train and test into form appropriate for CondRNN train_X_input, train_X_aux, test_X_input, test_X_aux, train_y, test_y = forecast_preprocessing( history, test_row) model = fit_cond_rnn(train_X_input, train_X_aux, train_y) #get predictions for new test observation prediction = model.predict([test_X_input, test_X_aux]) predictions.append(prediction) #we've got the first prediction, so now start from 1 for i in range(1, len(test)): #get breakpoints for train dataset history = functions.ada_preprocessing(train) if i == len(test) - 1: bkp = history["concept"] history.drop(["transition", "steps_to_bkp", "steps_since_bkp"], axis=1, inplace=True) #get the dataframe for new test observation train.append(test[i]) test_row = manual_preprocessing(train, history.tail(1)) #change train and test into form appropriate for CondRNN train_X_input, train_X_aux, test_X_input, test_X_aux, train_y, test_y = forecast_preprocessing( history, test_row) #get predictions for new test observation prediction = model.predict([test_X_input, test_X_aux]) predictions.append(prediction) end = time.perf_counter() print("Time spent on cond_rnn: {:.2f}h".format((end - start) / 3600)) #inverting predictions to original scale # predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1])) error = smape(np.asarray(predictions), np.asarray(test)) smape_dict[name] = error #plot_save(np.asarray(predictions), test, bkp, "results/cond_rnn/"+name, setback) dict_path = "results/cond_rnn/errors/error" + str( iteration) + name + ".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n" % (key, smape_dict[key]))
def main(iteration, name): print("lstm with oneshot is running") smape_dict = {} #loading the data data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:, 0].to_list() #70/30 train/test split split = int(0.7 * len(data)) train, test = data[:split], data[split:] predictions = [] # train the model outside the for-loop history = functions.ada_preprocessing(train) history = history.loc[:, "t":"t-5"] model = fit_lstm(history) start = time.perf_counter() for i in range(0, len(test)): print("lstm with oneshot is alive") #get test observation into necessary shape train.append(test[i]) test_row = manual_preprocessing(train) X = test_row.loc[:, "t-1":"t-5"] X_arrays = np.asarray(X) test_X = np.hstack(X_arrays).reshape(X.shape[0], 1, X.shape[1]) #get predictions for new test observation prediction = model.predict(test_X) predictions.append(prediction) #get breakpoints for train dataset history = functions.ada_preprocessing(train) history = history.loc[:, "t":"t-5"] end = time.perf_counter() print("Time spent: {:.2f}h".format((end - start) / 3600)) #inverting predictions to original scale # predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1])) error = smape(np.asarray(predictions), np.asarray(test)) smape_dict[name] = error print("SMAPE: {:.4f}".format(error)) plt.plot(test, label="expected", color="black") plt.plot(np.asarray(predictions).reshape([-1, 1]), label="predicted", color="red") plt.title(name) plt.legend() image_path = "results/lstm/oneshot/" + name + ".png" plt.savefig(image_path) plt.clf() #saving the dictionary containing errors dict_path = "results/lstm/oneshot/errors/error" + str( iteration) + name + ".txt" with open(dict_path, 'w') as file: for key in smape_dict.keys(): file.write("%s,%s\n" % (key, smape_dict[key]))