def main(argv): """ Predict based on the trained model and specfic checkpoints. """ assert len(argv) == 1 # laod data from local disk. (x_train_dev, y_train_dev), (x_train, y_train), (x_dev, y_dev), ( x_test, y_test), (series_max, series_min) = load_normalized_data( # "orig_day_full_X.xlsx", # Do original prediction "vmd_imf10.xlsx", # Do vmd IMFs prediction seed=123) # create feature colums feature_columns = [ tf.feature_column.numeric_column("X1"), tf.feature_column.numeric_column("X2"), tf.feature_column.numeric_column("X3"), # tf.feature_column.numeric_column("X4"), # tf.feature_column.numeric_column("X5"), # tf.feature_column.numeric_column("X6"), # tf.feature_column.numeric_column("X7"), # tf.feature_column.numeric_column("X8"), # tf.feature_column.numeric_column("X9"), # tf.feature_column.numeric_column("X10"), # tf.feature_column.numeric_column("X11"), # tf.feature_column.numeric_column("X12"), # tf.feature_column.numeric_column("X13"), # tf.feature_column.numeric_column("X14"), # tf.feature_column.numeric_column("X15"), # tf.feature_column.numeric_column("X16"), # tf.feature_column.numeric_column("X17"), # tf.feature_column.numeric_column("X18"), # tf.feature_column.numeric_column("X19"), ] # recovery the model, and set the dropout rate to 0.0 model_path = current_path + '/models/imf10/' # current_model = 'DNNRegressor_Hidden_Units[7, 13]' # orig # current_model = 'DNNRegressor_Hidden_Units[5, 8]' # imf1 # current_model = 'DNNRegressor_Hidden_Units[3]' # imf2 # current_model = 'DNNRegressor_Hidden_Units[9, 12]' # imf3 # current_model = 'DNNRegressor_Hidden_Units[6, 10]' # imf4 # current_model = 'DNNRegressor_Hidden_Units[5, 11]' # imf5 # current_model = 'DNNRegressor_Hidden_Units[4, 7]' # imf6 # current_model = 'DNNRegressor_Hidden_Units[11, 12]' # imf7 # current_model = 'DNNRegressor_Hidden_Units[4]' # imf8 # current_model = 'DNNRegressor_Hidden_Units[13, 12]' # imf9 current_model = 'DNNRegressor_Hidden_Units[3]' # imf10 model_dir = model_path + current_model + '/' # model = tf.estimator.Estimator( # model_fn=my_dnn_regression_fn, # model_dir=model_dir, # params={ # 'feature_columns': feature_columns, # # NOTE: Set the hidden units for predictions # 'hidden_units': [7], # 'drop_rates': [0.0] # }, # ) model = tf.estimator.DNNRegressor( # hidden_units=[7, 13], # orig # hidden_units=[5, 8], # imf1 # hidden_units=[3], # imf2 # hidden_units=[9,12], # imf3 # hidden_units=[6,10], # imf4 # hidden_units=[5,11], # imf5 # hidden_units=[4], # imf6 # hidden_units=[11,12], # imf7 # hidden_units=[13,12], # imf9 hidden_units=[3], # imf10 feature_columns=feature_columns, model_dir=model_dir, ) train_pred_input_fn = tf.estimator.inputs.pandas_input_fn( x_train, shuffle=False) dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn( x_dev, shuffle=False) test_pred_input_fn = tf.estimator.inputs.pandas_input_fn( x_test, shuffle=False) # Use the specific file to predict # checkpoint_path = model_dir + 'model.ckpt-7200' #orig # checkpoint_path = model_dir + 'model.ckpt-29600' #imf1 # checkpoint_path = model_dir + 'model.ckpt-50600' #imf2 # checkpoint_path = model_dir + 'model.ckpt-74900' #imf3 # checkpoint_path = model_dir + 'model.ckpt-30000' #imf4 # checkpoint_path = model_dir + 'model.ckpt-73100' #imf5 # checkpoint_path = model_dir + 'model.ckpt-81700' #imf6 # checkpoint_path = model_dir + 'model.ckpt-13200' #imf7 # checkpoint_path = model_dir + 'model.ckpt-32800' #imf8 # checkpoint_path = model_dir + 'model.ckpt-11700' #imf9 checkpoint_path = model_dir + 'model.ckpt-45600' #imf10 # predict the training set by specfic checkpoint train_pred_results = model.predict( input_fn=train_pred_input_fn, checkpoint_path=checkpoint_path) # predict the developing set dev_pred_results = model.predict( input_fn=dev_pred_input_fn, checkpoint_path=checkpoint_path) # predict the testing set. test_pred_results = model.predict( input_fn=test_pred_input_fn, checkpoint_path=checkpoint_path) # Convert generator to numpy array train_predictions = np.array( list(p['predictions'] for p in train_pred_results)) dev_predictions = np.array( list(p['predictions'] for p in dev_pred_results)) test_predictions = np.array( list(p['predictions'] for p in test_pred_results)) # reshape the prediction to y shape. train_predictions = train_predictions.reshape(np.array(y_train).shape) dev_predictions = dev_predictions.reshape(np.array(y_dev).shape) test_predictions = test_predictions.reshape(np.array(y_test).shape) # Renormalize the records and predictions y_train = np.multiply( y_train + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] train_predictions = np.multiply(train_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] y_dev = np.multiply( y_dev + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] dev_predictions = np.multiply(dev_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] y_test = np.multiply( y_test + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] test_predictions = np.multiply(test_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] # compute R square r2_train = r2_score(y_train, train_predictions) r2_dev = r2_score(y_dev, dev_predictions) r2_test = r2_score(y_test, test_predictions) # compute MSE mse_train = mean_squared_error(y_train, train_predictions) mse_dev = mean_squared_error(y_dev, dev_predictions) mse_test = mean_squared_error(y_test, test_predictions) # compute MAE mae_train = mean_absolute_error(y_train, train_predictions) mae_dev = mean_absolute_error(y_dev, dev_predictions) mae_test = mean_absolute_error(y_test, test_predictions) # compute MAPE mape_train = np.true_divide( np.sum(np.abs(np.true_divide( (y_train - train_predictions), y_train))), y_train.size) * 100 mape_dev = np.true_divide( np.sum(np.abs(np.true_divide( (y_dev - dev_predictions), y_dev))), y_dev.size) * 100 mape_test = np.true_divide( np.sum(np.abs(np.true_divide( (y_test - test_predictions), y_test))), y_test.size) * 100 # print('r2_score_train = {:.10f}'.format(r2_train)) print('r2_score_dev = {:.10f}'.format(r2_dev)) dump_train_dev_test_to_excel( path=model_path + current_model + '.xlsx', y_train=y_train, train_pred=train_predictions, r2_train=r2_train, mse_train=mse_train, mae_train=mae_train, mape_train=mape_train, y_dev=y_dev, dev_pred=dev_predictions, r2_dev=r2_dev, mse_dev=mse_dev, mae_dev=mae_dev, mape_dev=mape_dev, y_test=y_test, test_pred=test_predictions, r2_test=r2_test, mse_test=mse_test, mae_test=mae_test, mape_test=mape_test) plot_rela_pred( y_train, train_predictions, series_max, series_min, fig_savepath=model_path + current_model + '_train_pred.tif') plot_rela_pred( y_dev, dev_predictions, series_max, series_min, fig_savepath=model_path + current_model + "_dev_pred.tif") plot_rela_pred( y_test, test_predictions, series_max, series_min, fig_savepath=model_path + current_model + "_test_pred.tif")
train_predictions = np.multiply(train_predictions + 1, sMax - sMin) / 2 + sMin train_predictions[train_predictions < 0.0] = 0.0 dev_y = np.multiply(dev_y + 1, sMax - sMin) / 2 + sMin dev_predictions = np.multiply(dev_predictions + 1, sMax - sMin) / 2 + sMin dev_predictions[dev_predictions < 0.0] = 0.0 test_y = np.multiply(test_y + 1, sMax - sMin) / 2 + sMin test_predictions = np.multiply(test_predictions + 1, sMax - sMin) / 2 + sMin test_predictions[test_predictions < 0.0] = 0.0 dum_pred_results( path=model_path + MODEL_NAME + '.csv', train_y=train_y, train_predictions=train_predictions, dev_y=dev_y, dev_predictions=dev_predictions, test_y=test_y, test_predictions=test_predictions, time_cost=time_cost, ) plot_rela_pred(train_y, train_predictions, fig_savepath=model_path + MODEL_NAME + '-TRAIN-PRED.png') plot_rela_pred(dev_y, dev_predictions, fig_savepath=model_path + MODEL_NAME + "-DEV-PRED.png") plot_rela_pred(test_y, test_predictions, fig_savepath=model_path + MODEL_NAME + "-TEST-PRED.png") plot_error_distribution(test_predictions, test_y, model_path + MODEL_NAME + '-ERROR-DSTRI.png')
def my_lstm(path,pattern,HU1,DR1, HL=1, HU2=8, DR2=0.0, LR=0.007, EPS=1000, lev=None, EARLY_STOPING = True, MODEL_ID=None, loss='mean_squared_error', wavelet=None): if wavelet == None and MODEL_ID==None: data_path = path + 'data\\'+pattern+'\\' model_path = path+'projects\\lstm-models-history\\'+pattern+'\\history\\' elif wavelet==None and MODEL_ID!=None: data_path = path + 'data\\'+pattern+'\\' model_path = path+'projects\\lstm-models-history\\'+pattern+'\\history\\s'+str(MODEL_ID)+'\\' elif wavelet!=None and MODEL_ID==None: data_path = path + 'data\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\' model_path = path+'projects\\lstm-models-history\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\history\\' elif wavelet!=None and MODEL_ID!=None: data_path = path + 'data\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\' model_path = path+'projects\\lstm-models-history\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\history\\s'+str(MODEL_ID)+'\\' # 1.Import the sampled normalized data set from disk if MODEL_ID==None: train = pd.read_csv(data_path+'minmax_unsample_train.csv') dev = pd.read_csv(data_path+'minmax_unsample_dev.csv') test = pd.read_csv(data_path+'minmax_unsample_test.csv') else: train = pd.read_csv(data_path+'minmax_unsample_train_s'+str(MODEL_ID)+'.csv') dev = pd.read_csv(data_path+'minmax_unsample_dev_s'+str(MODEL_ID)+'.csv') test = pd.read_csv(data_path+'minmax_unsample_test_s'+str(MODEL_ID)+'.csv') # Split features from labels train_x = train train_y = train.pop('Y') train_y = train_y.values dev_x = dev dev_y = dev.pop('Y') dev_y = dev_y.values test_x = test test_y = test.pop('Y') test_y = test_y.values # reshape the input features for LSTM train_x = (train_x.values).reshape(train_x.shape[0],1,train_x.shape[1]) dev_x = (dev_x.values).reshape(dev_x.shape[0],1,dev_x.shape[1]) test_x = (test_x.values).reshape(test_x.shape[0],1,test_x.shape[1]) RE_TRAIN = False WARM_UP = False # EARLY_STOPING = True INITIAL_EPOCH = 6000 # For initialize weights and bias SEED=1 # set hyper-parameters # EPS=1000 #epochs number #########--1--########### # LR=0.007 #learnin rate 0.0001, 0.0003, 0.0007, 0.001, 0.003, 0.007,0.01, 0.03 0.1 #########--2--############ #HU1 = 32 #hidden units for hidden layer 1: [8,16,24,32] BS = 512 #batch size #########--3--########### # HL = 1 #hidden layers # HU2 = 16 #hidden units for hidden layer 2 DC=0.000 #decay rate of learning rate #########--4--########### #DR1=0.7 #dropout rate for hidden layer 1:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] # DR2=0.0 #dropout rate for hidden layer 2 # 2.Build LSTM model with keras # set the hyper-parameters LEARNING_RATE=LR EPOCHS = EPS BATCH_SIZE = BS if HL==2: HIDDEN_UNITS = [HU1,HU2] DROP_RATE = [DR1,DR2] else: HIDDEN_UNITS = [HU1] DROP_RATE = [DR1] DECAY_RATE = DC if MODEL_ID==None: MODEL_NAME = 'LSTM-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(EPOCHS)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']' else: MODEL_NAME = 'LSTM-S'+str(MODEL_ID)+'-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(EPOCHS)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']' # RESUME_TRAINING = True def build_model(): if HL==2: model = keras.Sequential( [ layers.LSTM(HIDDEN_UNITS[0],activation=tf.nn.relu,return_sequences=True,input_shape=(train_x.shape[1],train_x.shape[2])), layers.Dropout(DROP_RATE[0], noise_shape=None, seed=None), layers.LSTM(HIDDEN_UNITS[1],activation=tf.nn.relu,return_sequences=False), # first hidden layer if hasnext hidden layer layers.Dropout(DROP_RATE[1], noise_shape=None, seed=None), # layers.LSTM(20,activation=tf.nn.relu,return_sequence=True), layers.Dense(1) ] ) else: model = keras.Sequential( [ layers.LSTM(HIDDEN_UNITS[0],activation=tf.nn.relu,input_shape=(train_x.shape[1],train_x.shape[2])), layers.Dropout(DROP_RATE[0], noise_shape=None, seed=None), # layers.LSTM(HIDDEN_UNITS1,activation=tf.nn.relu,return_sequences=True,input_shape=(train_x.shape[1],train_x.shape[2])), # first hidden layer if hasnext hidden layer # layers.LSTM(20,activation=tf.nn.relu,return_sequence=True), layers.Dense(1) ] ) optimizer = keras.optimizers.Adam(LEARNING_RATE, decay=DECAY_RATE ) if loss=='mean_squared_error': print('Loss Function:mean_square_error') model.compile( loss='mean_squared_error', optimizer=optimizer, metrics=['mean_absolute_error','mean_squared_error']) elif loss=='custom_loss': print('Custom Loss Function') model.compile( loss=custom_loss, optimizer=optimizer, metrics=['mean_absolute_error','mean_squared_error',custom_loss]) return model # set model's parameters restore path cp_path = model_path+MODEL_NAME+'\\' if not os.path.exists(cp_path): os.makedirs(cp_path) checkpoint_path = model_path+MODEL_NAME+'\\cp.h5' #restore only the latest checkpoint after every update # checkpoint_path = model_path+'cp-{epoch:04d}.ckpt' #restore the checkpoint every period=x epoch checkpoint_dir = os.path.dirname(checkpoint_path) print('checkpoint dir:{}'.format(checkpoint_dir)) cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,save_best_only=True,mode='min',save_weights_only=True,verbose=1) model = build_model() model.summary() #print a simple description for the model """ # Evaluate before training or load trained weights and biases loss, mae, mse = model.evaluate(test_x, test_y, verbose=1) # Try the model with initial weights and biases example_batch = train_x[:10] example_result = model.predict(example_batch) print(example_result) """ # 3.Train the model # Display training progress by printing a single dot for each completed epoch class PrintDot(keras.callbacks.Callback): def on_epoch_end(self, epoch, logs): if epoch % 100 == 0: print('') print('.', end='') files = os.listdir(checkpoint_dir) from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping # reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto') reduce_lr = ReduceLROnPlateau(monitor='val_loss',min_lr=0.00001,factor=0.2, verbose=1,patience=10, mode='min') early_stopping = EarlyStopping(monitor='val_loss', mode='min',verbose=1,patience=100,restore_best_weights=True) if MODEL_ID==None: warm_dir = 'LSTM-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(INITIAL_EPOCH)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']' else: warm_dir = 'LSTM-S'+str(MODEL_ID)+'-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(INITIAL_EPOCH)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']' print(os.path.exists(model_path+warm_dir)) if RE_TRAIN: print('retrain the model') if EARLY_STOPING: history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1, callbacks=[ cp_callback, early_stopping, ]) else: history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback]) hist2 = pd.DataFrame(history2.history) hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv') hist2['epoch']=history2.epoch # print(hist.tail()) plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png') elif len(files)==0: if os.path.exists(model_path+warm_dir) and WARM_UP: print('WARM UP FROM EPOCH '+str(INITIAL_EPOCH)) warm_path=model_path+warm_dir+'\\cp.ckpt' model.load_weights(warm_path) if EARLY_STOPING: history2 = model.fit(train_x,train_y,initial_epoch=INITIAL_EPOCH,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1, callbacks=[ cp_callback, early_stopping, ]) else: history2 = model.fit(train_x,train_y,initial_epoch=INITIAL_EPOCH,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback,]) hist2 = pd.DataFrame(history2.history) hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv') hist2['epoch']=history2.epoch # print(hist.tail()) plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png') else: print('new train') if EARLY_STOPING: history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[ cp_callback, early_stopping, ]) else: history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback,]) hist2 = pd.DataFrame(history2.history) hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv') hist2['epoch']=history2.epoch # print(hist.tail()) plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png') else: print('#'*10+'Already Trained') model.load_weights(checkpoint_path) model.load_weights(checkpoint_path) # loss, mae, mse = model.evaluate(test_x, test_y, verbose=1) """ # Evaluate after training or load trained weights and biases loss, mae, mse = model.evaluate(test_x, test_y, verbose=1) print("Testing set Mean Abs Error: {:5.2f} ".format(mae)) """ # 4. Predict the model # load the unsample data train_predictions = model.predict(train_x).flatten() dev_predictions = model.predict(dev_x).flatten() test_predictions = model.predict(test_x).flatten() # plt.figure() # plt.plot(train_y,c='b') # plt.plot(train_predictions,c='r') # plt.show() # renormized the predictions and labels # load the normalized traindev indicators if MODEL_ID==None: norm = pd.read_csv(data_path+'norm_id.csv') sMax = norm['series_max'][norm.shape[0]-1] sMin = norm['series_min'][norm.shape[0]-1] else: norm = pd.read_csv(data_path+'norm_id_s'+str(MODEL_ID)+'.csv') sMax = norm['series_max'][norm.shape[0]-1] sMin = norm['series_min'][norm.shape[0]-1] print('Series min:{}'.format(sMin)) print('Series max:{}'.format(sMax)) train_y = np.multiply(train_y + 1,sMax - sMin) / 2 + sMin train_predictions = np.multiply(train_predictions + 1,sMax - sMin) / 2 + sMin dev_y = np.multiply(dev_y + 1,sMax - sMin) / 2 + sMin dev_predictions = np.multiply(dev_predictions + 1,sMax - sMin) / 2 + sMin test_y = np.multiply(test_y + 1,sMax - sMin) / 2 + sMin test_predictions = np.multiply(test_predictions + 1,sMax - sMin) / 2 + sMin print("pattern.find('multi')={}".format(pattern.find('multi'))) print("pattern.find('one')={}".format(pattern.find('one'))) if pattern.find('one')>=0: print('decomposition ensemble model!!!!!!!!!!!!!!!!!!!!!!!') train_predictions[train_predictions<0.0]=0.0 dev_predictions[dev_predictions<0.0]=0.0 test_predictions[test_predictions<0.0]=0.0 elif pattern.find('one')<0 and pattern.find('multi')<0: print('monoscale model$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') train_predictions[train_predictions<0.0]=0.0 dev_predictions[dev_predictions<0.0]=0.0 test_predictions[test_predictions<0.0]=0.0 dum_pred_results( path = model_path+MODEL_NAME+'.csv', train_y = train_y, train_predictions=train_predictions, dev_y = dev_y, dev_predictions = dev_predictions, test_y = test_y, test_predictions = test_predictions) plot_rela_pred(train_y,train_predictions,fig_savepath=model_path + MODEL_NAME + '-TRAIN-PRED.png') plot_rela_pred(dev_y,dev_predictions,fig_savepath=model_path + MODEL_NAME + "-DEV-PRED.png") plot_rela_pred(test_y,test_predictions,fig_savepath=model_path + MODEL_NAME + "-TEST-PRED.png") plot_error_distribution(test_predictions,test_y,model_path+MODEL_NAME+'-ERROR-DSTRI.png') plt.close('all') tf.keras.backend.clear_session()
def main(argv): """ Predict based on the trained model and specfic checkpoints. """ assert len(argv) == 1 (x_train_dev, y_train_dev), (x_train0, y_train0), (x_dev0, y_dev0), ( x_test0, y_test0), (series_max, series_min) = load_normalized_data('VMD_IMFS.xlsx', seed=123) # data_file = 'ARMA_IMFs_PRED.xlsx' # data_file = 'SVR_IMFs_PRED.xlsx' # data_file = 'GBR_IMFs_PRED.xlsx' data_file = 'DNN_IMFs_PRED.xlsx' # print(10 * '-' + ' Data file: {}'.format(data_file)) # # laod data from local disk. # (x_train_dev, y_train_dev), (x_train, y_train), (x_dev, y_dev), ( # x_test, y_test), (series_max, # series_min) = load_normalized_data( # data_file, seed=123) full_data_set = pd.read_excel(par_path_2 + '\\data\\' + data_file) full_norm_set = 2 * (full_data_set - series_min) / (series_max - series_min) - 1 series_len = len(full_norm_set) train_dev_set = full_norm_set[0:(series_len - 541)] y_train_dev = train_dev_set['Y'] x_train_dev = train_dev_set.drop('Y', axis=1) # Get the test set test_set = full_norm_set[(series_len - 541):series_len] # Shuffle the data np.random.seed(123) # split the data into train/developing subsets x_train = train_dev_set.sample(frac=0.888888889, random_state=123) x_dev = train_dev_set.drop(x_train.index) # Extract the label from the features dataframe y_train = x_train.pop('Y') y_dev = x_dev.pop('Y') # print(test_set) x_test = test_set y_test = x_test.pop('Y') # create feature colums feature_columns = [ tf.feature_column.numeric_column("X1"), tf.feature_column.numeric_column("X2"), tf.feature_column.numeric_column("X3"), tf.feature_column.numeric_column("X4"), tf.feature_column.numeric_column("X5"), tf.feature_column.numeric_column("X6"), tf.feature_column.numeric_column("X7"), tf.feature_column.numeric_column("X8"), tf.feature_column.numeric_column("X9"), tf.feature_column.numeric_column("X10"), ] # recovery the model, and set the dropout rate to 0.0 model_path = current_path + '/models/ensemble/' current_model = 'DNNRegressor_Hidden_Units[9, 8]' model_dir = model_path + current_model + '/' # model = tf.estimator.Estimator( # model_fn=my_dnn_regression_fn, # model_dir=model_dir, # params={ # 'feature_columns': feature_columns, # # NOTE: Set the hidden units for predictions # 'hidden_units': [7], # 'drop_rates': [0.0] # }, # ) model = tf.estimator.DNNRegressor( hidden_units=[9, 8], feature_columns=feature_columns, model_dir=model_dir, ) train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_train, shuffle=False) dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_dev, shuffle=False) test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_test, shuffle=False) # Use the specific file to predict checkpoint_path = model_dir + 'model.ckpt-22400' # predict the training set by specfic checkpoint train_pred_results = model.predict(input_fn=train_pred_input_fn, checkpoint_path=checkpoint_path) # predict the developing set dev_pred_results = model.predict(input_fn=dev_pred_input_fn, checkpoint_path=checkpoint_path) # predict the testing set. test_pred_results = model.predict(input_fn=test_pred_input_fn, checkpoint_path=checkpoint_path) # Convert generator to numpy array train_predictions = np.array( list(p['predictions'] for p in train_pred_results)) dev_predictions = np.array(list(p['predictions'] for p in dev_pred_results)) test_predictions = np.array( list(p['predictions'] for p in test_pred_results)) # reshape the prediction to y shape. train_predictions = train_predictions.reshape(np.array(y_train).shape) dev_predictions = dev_predictions.reshape(np.array(y_dev).shape) test_predictions = test_predictions.reshape(np.array(y_test).shape) # Renormalize the records and predictions y_train = np.multiply( y_train + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] train_predictions = np.multiply(train_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] y_dev = np.multiply( y_dev + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] dev_predictions = np.multiply(dev_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] y_test = np.multiply( y_test + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] test_predictions = np.multiply(test_predictions + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"] # compute R square r2_train = r2_score(y_train, train_predictions) r2_dev = r2_score(y_dev, dev_predictions) r2_test = r2_score(y_test, test_predictions) # compute MSE mse_train = mean_squared_error(y_train, train_predictions) mse_dev = mean_squared_error(y_dev, dev_predictions) mse_test = mean_squared_error(y_test, test_predictions) # compute MAE mae_train = mean_absolute_error(y_train, train_predictions) mae_dev = mean_absolute_error(y_dev, dev_predictions) mae_test = mean_absolute_error(y_test, test_predictions) # compute MAPE mape_train = np.true_divide( np.sum(np.abs(np.true_divide( (y_train - train_predictions), y_train))), y_train.size) * 100 mape_dev = np.true_divide( np.sum(np.abs(np.true_divide( (y_dev - dev_predictions), y_dev))), y_dev.size) * 100 mape_test = np.true_divide( np.sum(np.abs(np.true_divide( (y_test - test_predictions), y_test))), y_test.size) * 100 # print('r2_score_train = {:.10f}'.format(r2_train)) print('r2_score_dev = {:.10f}'.format(r2_dev)) dump_train_dev_test_to_excel(path=model_path + current_model + data_file + '.xlsx', y_train=y_train, train_pred=train_predictions, r2_train=r2_train, mse_train=mse_train, mae_train=mae_train, mape_train=mape_train, y_dev=y_dev, dev_pred=dev_predictions, r2_dev=r2_dev, mse_dev=mse_dev, mae_dev=mae_dev, mape_dev=mape_dev, y_test=y_test, test_pred=test_predictions, r2_test=r2_test, mse_test=mse_test, mae_test=mae_test, mape_test=mape_test) plot_rela_pred(y_train, train_predictions, series_max, series_min, fig_savepath=model_path + current_model + data_file + '_train_pred.tif') plot_rela_pred(y_dev, dev_predictions, series_max, series_min, fig_savepath=model_path + current_model + data_file + "_dev_pred.tif") plot_rela_pred(y_test, test_predictions, series_max, series_min, fig_savepath=model_path + current_model + data_file + "_test_pred.tif")
mse_dev=mse_dev, mae_dev=mae_dev, mape_dev=mape_dev, y_test=y_test, test_pred=test_predictions, r2_test=r2_test, mse_test=mse_test, mae_test=mae_test, mape_test=mape_test) # print(test_predictions) # plot the predicted line plot_rela_pred(y_train, train_predictions, series_max, series_min, fig_savepath=model_path + 'SVR_train_pred.png') # plot_normreconvert_relation( # y_train, # train_predictions, # series_max, # series_min, # fig_savepath=model_path + "SVR_train_rela.png") plot_rela_pred(y_dev, dev_predictions, series_max, series_min, fig_savepath=model_path + "SVR_dev_pred.png")
def ensemble_optimization(root_path, station, variables, orig_df, pattern, decomposer=None, lev=None, wavelet=None, criterion='RMSE'): # load variables lags_dict = variables['lags_dict'] full_len = variables['full_len'] train_len = variables['train_len'] dev_len = variables['dev_len'] test_len = variables['test_len'] if pattern.find('one') < 0 and pattern.find('multi') < 0: print('Moonscale Pattern') leading_time = int(pattern.split('_')[0]) else: print('Decomposition ensemble pattern') leading_time = int(pattern.split('_')[2]) def dict_to_list(dictionary): results = [] for key in dictionary: results.append(dictionary[key]) return results if decomposer == None: subsignals_num = None model_path = root_path + '/' + station + '_orig/projects/lstm-models-history/' + pattern + '/' lags = lags_dict['orig'] leading_time = int(pattern.split('_')[0]) train_samples_len = train_len - lags - leading_time + 1 else: leading_time = int(pattern.split('_')[2]) if wavelet == None: model_path = root_path + '/' + station + '_' + decomposer + '/projects/lstm-models-history/' + pattern + '/' lags = dict_to_list(lags_dict[decomposer]) train_samples_len = train_len - max(lags) - leading_time + 1 subsignals_num = lev assert lev == len(lags) else: model_path = root_path + '/' + station + '_' + decomposer + '/projects/lstm-models-history/' + wavelet + '-' + str( lev) + '/' + pattern + '/' lags = dict_to_list(lags_dict[wavelet + '-' + str(lev)]) train_samples_len = train_len - max(lags) - leading_time + 1 subsignals_num = lev + 1 assert lev + 1 == len(lags) print('Station:{}'.format(station)) print('Decomposer:{}'.format(decomposer)) print('Decomposition level:{}'.format(lev)) print('Prediction pattern:{}'.format(pattern)) print('Wavelet:{}'.format(wavelet)) print('full_len:{}'.format(full_len)) print('train_len:{}'.format(train_len)) print('dev_len:{}'.format(dev_len)) print('test_len:{}'.format(test_len)) print('train_samples_len:{}'.format(train_samples_len)) print('lags:{}'.format(lags)) print('Subsignals num:{}'.format(subsignals_num)) print('Models are developed on {}'.format(criterion)) if pattern.find('one_model') >= 0 or decomposer == None: print("################") signal_model = model_path + 'history/' criterion_dict = {} for files in os.listdir(signal_model): if files.find('.csv') >= 0 and (files.find('HISTORY') < 0 and files.find('metrics') < 0): # print(files) data = pd.read_csv(signal_model + files) dev_y = data['dev_y'][0:dev_len] dev_pred = data['dev_pred'][0:dev_len] if criterion == 'RMSE': criterion_dict[files] = data['rmse_dev'][0] elif criterion == 'NMSE': NMSE = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred) criterion_dict[files] = NMSE key_min = min(criterion_dict.keys(), key=(lambda k: criterion_dict[k])) data = pd.read_csv(signal_model + key_min) train_y = data['train_y'][data.shape[0] - train_samples_len:] train_pred = data['train_pred'][data.shape[0] - train_samples_len:] train_pred[train_pred < 0.0] = 0.0 train_y = train_y.reset_index(drop=True) train_pred = train_pred.reset_index(drop=True) train_results = pd.concat([train_y, train_pred], axis=1, sort=False) dev_y = data['dev_y'][0:dev_len] dev_pred = data['dev_pred'][0:dev_len] dev_pred[dev_pred < 0.0] = 0.0 dev_results = pd.concat([dev_y, dev_pred], axis=1, sort=False) test_y = data['test_y'][0:test_len] test_pred = data['test_pred'][0:test_len] test_pred[test_pred < 0.0] = 0.0 test_results = pd.concat([test_y, test_pred], axis=1, sort=False) max_streamflow = max(orig_df) ratio_train = train_pred / max_streamflow ratio_dev = dev_pred / max_streamflow ratio_test = test_pred / max_streamflow rto_train = pd.DataFrame(ratio_train, columns=['train'])['train'] rto_dev = pd.DataFrame(ratio_dev, columns=['dev'])['dev'] rto_test = pd.DataFrame(ratio_test, columns=['test'])['test'] ratio_df = pd.concat([rto_train, rto_dev, rto_test], axis=1) ration1_5 = ratio_df[ratio_df > 1.5] ration2 = ratio_df[ratio_df > 2] count_1_5 = pd.concat([ ration1_5['train'].value_counts(), ration1_5['dev'].value_counts(), ration1_5['test'].value_counts() ], axis=1) count_2 = pd.concat([ ration2['train'].value_counts(), ration2['dev'].value_counts(), ration2['test'].value_counts() ], axis=1) count_1_5.to_csv(model_path + 'pred_div_maxtrue_ratio1_5_count.csv') count_2.to_csv(model_path + 'pred_div_maxtrue_ratio2_count.csv') ratio_df.to_csv(model_path + 'pred_div_maxtrue_ratio.csv') print('test_y=\n{}'.format(test_y)) print('test_pred=\n{}'.format(test_pred)) train_results.to_csv(model_path + 'model_train_results.csv', index=None) dev_results.to_csv(model_path + 'model_dev_results.csv', index=None) test_results.to_csv(model_path + 'model_test_results.csv', index=None) plot_rela_pred(train_y.values, train_pred.values, model_path + 'train_pred.png') plot_rela_pred(dev_y.values, dev_pred.values, model_path + 'dev_pred.png') plot_rela_pred(test_y.values, test_pred.values, model_path + 'test_pred.png') train_nse = r2_score(y_true=train_y.values, y_pred=train_pred.values) dev_nse = r2_score(y_true=dev_y.values, y_pred=dev_pred.values) test_nse = r2_score(y_true=test_y.values, y_pred=test_pred.values) train_nmse = normalized_mean_square_error(y_true=train_y, y_pred=train_pred) dev_nmse = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred) test_nmse = normalized_mean_square_error(y_true=test_y, y_pred=test_pred) train_rmse = math.sqrt( mean_squared_error(train_y.values, train_pred.values)) dev_rmse = math.sqrt(mean_squared_error(dev_y.values, dev_pred.values)) test_rmse = math.sqrt( mean_squared_error(test_y.values, test_pred.values)) train_nrmse = math.sqrt( mean_squared_error(train_y.values, train_pred.values)) / ( sum(train_y.values) / len(train_y.values)) dev_nrmse = math.sqrt(mean_squared_error( dev_y.values, dev_pred.values)) / (sum(dev_y.values) / len(dev_y.values)) test_nrmse = math.sqrt( mean_squared_error(test_y.values, test_pred.values)) / ( sum(test_y.values) / len(test_y.values)) train_mae = mean_absolute_error(y_true=train_y.values, y_pred=train_pred.values) dev_mae = mean_absolute_error(y_true=dev_y.values, y_pred=dev_pred.values) test_mae = mean_absolute_error(y_true=test_y.values, y_pred=test_pred.values) train_mape = np.mean( np.abs( (train_y.values - train_pred.values) / train_y.values)) * 100 dev_mape = np.mean( np.abs((dev_y.values - dev_pred.values) / dev_y.values)) * 100 test_mape = np.mean( np.abs((test_y.values - test_pred.values) / test_y.values)) * 100 train_ppts = PPTS(train_y.values, train_pred.values, 5) dev_ppts = PPTS(dev_y.values, dev_pred.values, 5) test_ppts = PPTS(test_y.values, test_pred.values, 5) print('#' * 25 + 'train_ppts:\n{}'.format(train_ppts)) print('#' * 25 + 'dev_ppts:\n{}'.format(dev_ppts)) print('#' * 25 + 'test_ppts:\n{}'.format(test_ppts)) metrics = { 'optimal': key_min, 'train_nse': train_nse, 'train_nmse': train_nmse, 'train_rmse': train_rmse, 'train_nrmse': train_nrmse, 'train_mae': train_mae, 'train_mape': train_mape, 'train_ppts': train_ppts, 'dev_nse': dev_nse, 'dev_nmse': dev_nmse, 'dev_rmse': dev_rmse, 'dev_nrmse': dev_nrmse, 'dev_mae': dev_mae, 'dev_mape': dev_mape, 'dev_ppts': dev_ppts, 'test_nse': test_nse, 'test_nmse': test_nmse, 'test_rmse': test_rmse, 'test_nrmse': test_nrmse, 'test_mae': test_mae, 'test_mape': test_mape, 'test_ppts': test_ppts, } metrics = pd.DataFrame(metrics, index=[0]) metrics.to_csv(model_path + 'model_metrics.csv') else: train_ens_pred = pd.DataFrame() dev_ens_pred = pd.DataFrame() test_ens_pred = pd.DataFrame() train_ens_y = pd.DataFrame() dev_ens_y = pd.DataFrame() test_ens_y = pd.DataFrame() subsignal_metrics = pd.DataFrame() for i in range(1, subsignals_num + 1): sub_signal = 's' + str(i) signal_model = model_path + 'history/' + sub_signal + '/' criterion_dict = {} for files in os.listdir(signal_model): if files.find('.csv') >= 0 and (files.find('HISTORY') < 0 and files.find('metrics') < 0): # print(files) data = pd.read_csv(signal_model + files) dev_y = data['dev_y'][0:dev_len] dev_pred = data['dev_pred'][0:dev_len] if criterion == 'RMSE': criterion_dict[files] = data['rmse_dev'][0] elif criterion == 'NMSE': NMSE = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred) criterion_dict[files] = NMSE key_min = min(criterion_dict.keys(), key=(lambda k: criterion_dict[k])) data = pd.read_csv(signal_model + key_min) train_y = data['train_y'][data.shape[0] - train_samples_len:] train_pred = data['train_pred'][data.shape[0] - train_samples_len:] train_y = train_y.reset_index(drop=True) train_pred = train_pred.reset_index(drop=True) dev_y = data['dev_y'][0:dev_len] dev_pred = data['dev_pred'][0:dev_len] test_y = data['test_y'][0:test_len] test_pred = data['test_pred'][0:test_len] train_nse = r2_score(y_true=train_y.values, y_pred=train_pred.values) dev_nse = r2_score(y_true=dev_y.values, y_pred=dev_pred.values) test_nse = r2_score(y_true=test_y.values, y_pred=test_pred.values) train_nmse = normalized_mean_square_error(y_true=train_y, y_pred=train_pred) dev_nmse = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred) test_nmse = normalized_mean_square_error(y_true=test_y, y_pred=test_pred) train_rmse = math.sqrt( mean_squared_error(train_y.values, train_pred.values)) dev_rmse = math.sqrt( mean_squared_error(dev_y.values, dev_pred.values)) test_rmse = math.sqrt( mean_squared_error(test_y.values, test_pred.values)) train_nrmse = math.sqrt( mean_squared_error(train_y.values, train_pred.values)) / ( sum(train_y.values) / len(train_y.values)) dev_nrmse = math.sqrt( mean_squared_error(dev_y.values, dev_pred.values)) / ( sum(dev_y.values) / len(dev_y.values)) test_nrmse = math.sqrt( mean_squared_error(test_y.values, test_pred.values)) / ( sum(test_y.values) / len(test_y.values)) train_mae = mean_absolute_error(y_true=train_y.values, y_pred=train_pred.values) dev_mae = mean_absolute_error(y_true=dev_y.values, y_pred=dev_pred.values) test_mae = mean_absolute_error(y_true=test_y.values, y_pred=test_pred.values) train_mape = np.mean( np.abs((train_y.values - train_pred.values) / train_y.values)) * 100 dev_mape = np.mean( np.abs((dev_y.values - dev_pred.values) / dev_y.values)) * 100 test_mape = np.mean( np.abs( (test_y.values - test_pred.values) / test_y.values)) * 100 train_ppts = PPTS(train_y.values, train_pred.values, 5) dev_ppts = PPTS(dev_y.values, dev_pred.values, 5) test_ppts = PPTS(test_y.values, test_pred.values, 5) print('#' * 25 + 'train_ppts:\n{}'.format(train_ppts)) print('#' * 25 + 'dev_ppts:\n{}'.format(dev_ppts)) print('#' * 25 + 'test_ppts:\n{}'.format(test_ppts)) metrics = { 'optimal': key_min, 'train_nse': train_nse, 'train_nmse': train_nmse, 'train_rmse': train_rmse, 'train_nrmse': train_nrmse, 'train_mae': train_mae, 'train_mape': train_mape, 'train_ppts': train_ppts, 'dev_nse': dev_nse, 'dev_nmse': dev_nmse, 'dev_rmse': dev_rmse, 'dev_nrmse': dev_nrmse, 'dev_mae': dev_mae, 'dev_mape': dev_mape, 'dev_ppts': dev_ppts, 'test_nse': test_nse, 'test_nmse': test_nmse, 'test_rmse': test_rmse, 'test_nrmse': test_nrmse, 'test_mae': test_mae, 'test_mape': test_mape, 'test_ppts': test_ppts, } metrics = pd.DataFrame(metrics, index=['s' + str(i)]) subsignal_metrics = pd.concat([subsignal_metrics, metrics], sort=False) train_ens_pred = pd.concat([train_ens_pred, train_pred], axis=1) dev_ens_pred = pd.concat([dev_ens_pred, dev_pred], axis=1) test_ens_pred = pd.concat([test_ens_pred, test_pred], axis=1) train_ens_y = pd.concat([train_ens_y, train_y], axis=1) dev_ens_y = pd.concat([dev_ens_y, dev_y], axis=1) test_ens_y = pd.concat([test_ens_y, test_y], axis=1) subsignal_metrics.to_csv(model_path + 'subsignals_metrics.csv') plot_subsignals_preds(subsignals_y=test_ens_y, subsignals_pred=test_ens_pred, fig_savepath=model_path + 'subsignals_pred.png') train_pred = train_ens_pred.sum(axis=1) dev_pred = dev_ens_pred.sum(axis=1) test_pred = test_ens_pred.sum(axis=1) train_pred[train_pred < 0.0] = 0.0 dev_pred[dev_pred < 0.0] = 0.0 test_pred[test_pred < 0.0] = 0.0 train_pred = train_pred.values dev_pred = dev_pred.values test_pred = test_pred.values print('train_pred len:{}'.format(len(train_pred))) train_y = orig_df[(train_len - train_samples_len):train_len] print('train_y len:{}'.format(train_y.shape[0])) dev_y = orig_df[train_len:train_len + test_len] test_y = orig_df[train_len + test_len:] train_y = train_y.reset_index(drop=True) dev_y = dev_y.reset_index(drop=True) test_y = test_y.reset_index(drop=True) train_y = train_y.values dev_y = dev_y.values test_y = test_y.values max_streamflow = max(orig_df) ratio_train = train_pred / max_streamflow ratio_dev = dev_pred / max_streamflow ratio_test = test_pred / max_streamflow rto_train = pd.DataFrame(ratio_train, columns=['train'])['train'] rto_dev = pd.DataFrame(ratio_dev, columns=['dev'])['dev'] rto_test = pd.DataFrame(ratio_test, columns=['test'])['test'] ratio_df = pd.concat([rto_train, rto_dev, rto_test], axis=1) ration1_5 = ratio_df[ratio_df > 1.5] ration2 = ratio_df[ratio_df > 2] count_1_5 = pd.concat([ ration1_5['train'].value_counts(), ration1_5['dev'].value_counts(), ration1_5['test'].value_counts() ], axis=1) count_2 = pd.concat([ ration2['train'].value_counts(), ration2['dev'].value_counts(), ration2['test'].value_counts() ], axis=1) count_1_5.to_csv(model_path + 'pred_div_maxtrue_ratio1_5_count.csv') count_2.to_csv(model_path + 'pred_div_maxtrue_ratio2_count.csv') ratio_df.to_csv(model_path + 'pred_div_maxtrue_ratio.csv') train_nse = r2_score(y_true=train_y, y_pred=train_pred) train_nmse = normalized_mean_square_error(y_true=train_y, y_pred=train_pred) train_rmse = math.sqrt(mean_squared_error(train_y, train_pred)) train_nrmse = math.sqrt(mean_squared_error( train_y, train_pred)) / (sum(train_y) / len(train_y)) train_mae = mean_absolute_error(train_y, train_pred) train_mape = np.mean(np.abs((train_y - train_pred) / train_y)) * 100 train_ppts = PPTS(train_y, train_pred, 5) dev_nse = r2_score(y_true=dev_y, y_pred=dev_pred) dev_nmse = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred) dev_rmse = math.sqrt(mean_squared_error(dev_y, dev_pred)) dev_nrmse = math.sqrt(mean_squared_error( dev_y, dev_pred)) / (sum(dev_y) / len(dev_y)) dev_mae = mean_absolute_error(dev_y, dev_pred) dev_mape = np.mean(np.abs((dev_y - dev_pred) / dev_y)) * 100 dev_ppts = PPTS(dev_y, dev_pred, 5) test_nse = r2_score(y_true=test_y, y_pred=test_pred) test_nmse = normalized_mean_square_error(y_true=test_y, y_pred=test_pred) test_rmse = math.sqrt(mean_squared_error(test_y, test_pred)) test_nrmse = math.sqrt(mean_squared_error( test_y, test_pred)) / (sum(test_y) / len(test_y)) test_mae = mean_absolute_error(test_y, test_pred) test_mape = np.mean(np.abs((test_y - test_pred) / test_y)) * 100 test_ppts = PPTS(test_y, test_pred, 5) model_metrics = { 'train_nse': train_nse, 'train_nmse': train_nmse, 'train_rmse': train_rmse, 'train_nrmse': train_nrmse, 'train_mae': train_mae, 'train_mape': train_mape, 'train_ppts': train_ppts, 'dev_nse': dev_nse, 'dev_nmse': dev_nmse, 'dev_rmse': dev_rmse, 'dev_nrmse': dev_nrmse, 'dev_mae': dev_mae, 'dev_mape': dev_mape, 'dev_ppts': dev_ppts, 'test_nse': test_nse, 'test_nmse': test_nmse, 'test_rmse': test_rmse, 'test_nrmse': test_nrmse, 'test_mae': test_mae, 'test_mape': test_mape, 'test_ppts': test_ppts, } model_train_results = { 'train_y': train_y, 'train_pred': train_pred, } model_dev_results = { 'dev_y': dev_y, 'dev_pred': dev_pred, } model_test_results = { 'test_y': test_y, 'test_pred': test_pred, } MODEL_METRICS = pd.DataFrame(model_metrics, index=np.arange(start=0, stop=1, step=1)) MODEL_TRAIN_RESULTS = pd.DataFrame(model_train_results, index=np.arange( start=0, stop=train_samples_len, step=1)) MODEL_DEV_RESULTS = pd.DataFrame(model_dev_results, index=np.arange(start=0, stop=dev_len, step=1)) MODEL_TEST_RESULTS = pd.DataFrame(model_test_results, index=np.arange(start=0, stop=test_len, step=1)) MODEL_METRICS.to_csv(model_path + 'model_metrics.csv') MODEL_TRAIN_RESULTS.to_csv(model_path + 'model_train_results.csv') MODEL_DEV_RESULTS.to_csv(model_path + 'model_dev_results.csv') MODEL_TEST_RESULTS.to_csv(model_path + 'model_test_results.csv') plot_rela_pred(train_y, train_pred, model_path + 'train_pred.png') plot_rela_pred(dev_y, dev_pred, model_path + 'dev_pred.png') plot_rela_pred(test_y, test_pred, model_path + 'test_pred.png') plt.close('all')