Example #1
0
def main(argv):
    """ Predict based on the trained model and specfic checkpoints. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train_dev, y_train_dev), (x_train, y_train), (x_dev, y_dev), (
        x_test, y_test), (series_max, series_min) = load_normalized_data(
            # "orig_day_full_X.xlsx", # Do original prediction
            "vmd_imf10.xlsx", # Do vmd IMFs prediction
             seed=123)

    # create feature colums
    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        # tf.feature_column.numeric_column("X4"),
        # tf.feature_column.numeric_column("X5"),
        # tf.feature_column.numeric_column("X6"),
        # tf.feature_column.numeric_column("X7"),
        # tf.feature_column.numeric_column("X8"),
        # tf.feature_column.numeric_column("X9"),
        # tf.feature_column.numeric_column("X10"),
        # tf.feature_column.numeric_column("X11"),
        # tf.feature_column.numeric_column("X12"),
        # tf.feature_column.numeric_column("X13"),
        # tf.feature_column.numeric_column("X14"),
        # tf.feature_column.numeric_column("X15"),
        # tf.feature_column.numeric_column("X16"),
        # tf.feature_column.numeric_column("X17"),
        # tf.feature_column.numeric_column("X18"),
        # tf.feature_column.numeric_column("X19"),
    ]

    # recovery the model, and set the dropout rate to 0.0
    model_path = current_path + '/models/imf10/'

    # current_model = 'DNNRegressor_Hidden_Units[7, 13]'  # orig
    # current_model = 'DNNRegressor_Hidden_Units[5, 8]'  # imf1
    # current_model = 'DNNRegressor_Hidden_Units[3]'  # imf2
    # current_model = 'DNNRegressor_Hidden_Units[9, 12]'  # imf3
    # current_model = 'DNNRegressor_Hidden_Units[6, 10]'  # imf4
    # current_model = 'DNNRegressor_Hidden_Units[5, 11]'  # imf5
    # current_model = 'DNNRegressor_Hidden_Units[4, 7]'  # imf6
    # current_model = 'DNNRegressor_Hidden_Units[11, 12]'  # imf7
    # current_model = 'DNNRegressor_Hidden_Units[4]'  # imf8
    # current_model = 'DNNRegressor_Hidden_Units[13, 12]'  # imf9
    current_model = 'DNNRegressor_Hidden_Units[3]'  # imf10
    model_dir = model_path + current_model + '/'

    # model = tf.estimator.Estimator(
    #     model_fn=my_dnn_regression_fn,
    #     model_dir=model_dir,
    #     params={
    #         'feature_columns': feature_columns,
    #         # NOTE: Set the hidden units for predictions
    #         'hidden_units': [7],
    #         'drop_rates': [0.0]
    #     },
    # )

    model = tf.estimator.DNNRegressor(
        # hidden_units=[7, 13], # orig
        # hidden_units=[5, 8], # imf1
        # hidden_units=[3], # imf2
        # hidden_units=[9,12], # imf3
        # hidden_units=[6,10], # imf4
        # hidden_units=[5,11], # imf5
        # hidden_units=[4], # imf6
        # hidden_units=[11,12], # imf7
        # hidden_units=[13,12], # imf9
        hidden_units=[3], # imf10
        feature_columns=feature_columns,
        model_dir=model_dir,
    )

    train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
        x_train, shuffle=False)
    dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
        x_dev, shuffle=False)
    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
        x_test, shuffle=False)

    # Use the specific file to predict
    # checkpoint_path = model_dir + 'model.ckpt-7200' #orig
    # checkpoint_path = model_dir + 'model.ckpt-29600' #imf1
    # checkpoint_path = model_dir + 'model.ckpt-50600' #imf2
    # checkpoint_path = model_dir + 'model.ckpt-74900' #imf3
    # checkpoint_path = model_dir + 'model.ckpt-30000' #imf4
    # checkpoint_path = model_dir + 'model.ckpt-73100' #imf5
    # checkpoint_path = model_dir + 'model.ckpt-81700' #imf6
    # checkpoint_path = model_dir + 'model.ckpt-13200' #imf7
    # checkpoint_path = model_dir + 'model.ckpt-32800' #imf8
    # checkpoint_path = model_dir + 'model.ckpt-11700' #imf9
    checkpoint_path = model_dir + 'model.ckpt-45600' #imf10

    # predict the training set by specfic checkpoint
    train_pred_results = model.predict(
        input_fn=train_pred_input_fn, checkpoint_path=checkpoint_path)
    # predict the developing set
    dev_pred_results = model.predict(
        input_fn=dev_pred_input_fn, checkpoint_path=checkpoint_path)
    # predict the testing set.
    test_pred_results = model.predict(
        input_fn=test_pred_input_fn, checkpoint_path=checkpoint_path)

    # Convert generator to numpy array
    train_predictions = np.array(
        list(p['predictions'] for p in train_pred_results))
    dev_predictions = np.array(
        list(p['predictions'] for p in dev_pred_results))
    test_predictions = np.array(
        list(p['predictions'] for p in test_pred_results))

    # reshape the prediction to y shape.
    train_predictions = train_predictions.reshape(np.array(y_train).shape)
    dev_predictions = dev_predictions.reshape(np.array(y_dev).shape)
    test_predictions = test_predictions.reshape(np.array(y_test).shape)

    # Renormalize the records and predictions
    y_train = np.multiply(
        y_train + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    train_predictions = np.multiply(train_predictions + 1, series_max["Y"] -
                                    series_min["Y"]) / 2 + series_min["Y"]
    y_dev = np.multiply(
        y_dev + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    dev_predictions = np.multiply(dev_predictions + 1, series_max["Y"] -
                                  series_min["Y"]) / 2 + series_min["Y"]
    y_test = np.multiply(
        y_test + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    test_predictions = np.multiply(test_predictions + 1, series_max["Y"] -
                                   series_min["Y"]) / 2 + series_min["Y"]

    # compute R square
    r2_train = r2_score(y_train, train_predictions)
    r2_dev = r2_score(y_dev, dev_predictions)
    r2_test = r2_score(y_test, test_predictions)

    # compute MSE
    mse_train = mean_squared_error(y_train, train_predictions)
    mse_dev = mean_squared_error(y_dev, dev_predictions)
    mse_test = mean_squared_error(y_test, test_predictions)

    # compute MAE
    mae_train = mean_absolute_error(y_train, train_predictions)
    mae_dev = mean_absolute_error(y_dev, dev_predictions)
    mae_test = mean_absolute_error(y_test, test_predictions)

    # compute MAPE
    mape_train = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_train - train_predictions), y_train))), y_train.size) * 100
    mape_dev = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_dev - dev_predictions), y_dev))), y_dev.size) * 100
    mape_test = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_test - test_predictions), y_test))), y_test.size) * 100

    #
    print('r2_score_train = {:.10f}'.format(r2_train))
    print('r2_score_dev = {:.10f}'.format(r2_dev))

    dump_train_dev_test_to_excel(
        path=model_path + current_model + '.xlsx',
        y_train=y_train,
        train_pred=train_predictions,
        r2_train=r2_train,
        mse_train=mse_train,
        mae_train=mae_train,
        mape_train=mape_train,
        y_dev=y_dev,
        dev_pred=dev_predictions,
        r2_dev=r2_dev,
        mse_dev=mse_dev,
        mae_dev=mae_dev,
        mape_dev=mape_dev,
        y_test=y_test,
        test_pred=test_predictions,
        r2_test=r2_test,
        mse_test=mse_test,
        mae_test=mae_test,
        mape_test=mape_test)

    plot_rela_pred(
        y_train,
        train_predictions,
        series_max,
        series_min,
        fig_savepath=model_path + current_model + '_train_pred.tif')

    plot_rela_pred(
        y_dev,
        dev_predictions,
        series_max,
        series_min,
        fig_savepath=model_path + current_model + "_dev_pred.tif")

    plot_rela_pred(
        y_test,
        test_predictions,
        series_max,
        series_min,
        fig_savepath=model_path + current_model + "_test_pred.tif")
Example #2
0
train_predictions = np.multiply(train_predictions + 1, sMax - sMin) / 2 + sMin
train_predictions[train_predictions < 0.0] = 0.0
dev_y = np.multiply(dev_y + 1, sMax - sMin) / 2 + sMin
dev_predictions = np.multiply(dev_predictions + 1, sMax - sMin) / 2 + sMin
dev_predictions[dev_predictions < 0.0] = 0.0
test_y = np.multiply(test_y + 1, sMax - sMin) / 2 + sMin
test_predictions = np.multiply(test_predictions + 1, sMax - sMin) / 2 + sMin
test_predictions[test_predictions < 0.0] = 0.0

dum_pred_results(
    path=model_path + MODEL_NAME + '.csv',
    train_y=train_y,
    train_predictions=train_predictions,
    dev_y=dev_y,
    dev_predictions=dev_predictions,
    test_y=test_y,
    test_predictions=test_predictions,
    time_cost=time_cost,
)

plot_rela_pred(train_y,
               train_predictions,
               fig_savepath=model_path + MODEL_NAME + '-TRAIN-PRED.png')
plot_rela_pred(dev_y,
               dev_predictions,
               fig_savepath=model_path + MODEL_NAME + "-DEV-PRED.png")
plot_rela_pred(test_y,
               test_predictions,
               fig_savepath=model_path + MODEL_NAME + "-TEST-PRED.png")
plot_error_distribution(test_predictions, test_y,
                        model_path + MODEL_NAME + '-ERROR-DSTRI.png')
Example #3
0
def my_lstm(path,pattern,HU1,DR1,
    HL=1,
    HU2=8,
    DR2=0.0,
    LR=0.007,
    EPS=1000,
    lev=None,
    EARLY_STOPING = True,
    MODEL_ID=None,
    loss='mean_squared_error',
    wavelet=None):
    if wavelet == None and MODEL_ID==None:
        data_path = path + 'data\\'+pattern+'\\'
        model_path = path+'projects\\lstm-models-history\\'+pattern+'\\history\\'
    elif wavelet==None and MODEL_ID!=None:
        data_path = path + 'data\\'+pattern+'\\'
        model_path = path+'projects\\lstm-models-history\\'+pattern+'\\history\\s'+str(MODEL_ID)+'\\'
    elif wavelet!=None and MODEL_ID==None:
        data_path = path + 'data\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\'
        model_path = path+'projects\\lstm-models-history\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\history\\'
    elif wavelet!=None and MODEL_ID!=None:
        data_path = path + 'data\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\'
        model_path = path+'projects\\lstm-models-history\\'+wavelet+'-'+str(lev)+'\\'+pattern+'\\history\\s'+str(MODEL_ID)+'\\'

    # 1.Import the sampled normalized data set from disk
    if MODEL_ID==None:
        train = pd.read_csv(data_path+'minmax_unsample_train.csv')
        dev = pd.read_csv(data_path+'minmax_unsample_dev.csv')
        test = pd.read_csv(data_path+'minmax_unsample_test.csv')
    else:
        train = pd.read_csv(data_path+'minmax_unsample_train_s'+str(MODEL_ID)+'.csv')
        dev = pd.read_csv(data_path+'minmax_unsample_dev_s'+str(MODEL_ID)+'.csv')
        test = pd.read_csv(data_path+'minmax_unsample_test_s'+str(MODEL_ID)+'.csv')

    # Split features from labels
    train_x = train
    train_y = train.pop('Y')
    train_y = train_y.values
    dev_x = dev
    dev_y = dev.pop('Y')
    dev_y = dev_y.values
    test_x = test
    test_y = test.pop('Y')
    test_y = test_y.values
    # reshape the input features for LSTM
    train_x = (train_x.values).reshape(train_x.shape[0],1,train_x.shape[1])
    dev_x = (dev_x.values).reshape(dev_x.shape[0],1,dev_x.shape[1])
    test_x = (test_x.values).reshape(test_x.shape[0],1,test_x.shape[1])

    RE_TRAIN = False
    WARM_UP = False
    # EARLY_STOPING = True
    INITIAL_EPOCH = 6000
    # For initialize weights and bias
    SEED=1
    # set hyper-parameters
    # EPS=1000     #epochs number
    #########--1--###########
    # LR=0.007     #learnin rate 0.0001, 0.0003, 0.0007, 0.001, 0.003, 0.007,0.01, 0.03 0.1

    #########--2--############
    #HU1 = 32     #hidden units for hidden layer 1: [8,16,24,32]
    BS = 512    #batch size

    #########--3--###########
    # HL = 1      #hidden layers
    # HU2 = 16    #hidden units for hidden layer 2
    DC=0.000  #decay rate of learning rate
    #########--4--###########
    #DR1=0.7  #dropout rate for hidden layer 1:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    # DR2=0.0      #dropout rate for hidden layer 2
    # 2.Build LSTM model with keras
    # set the hyper-parameters
    LEARNING_RATE=LR
    EPOCHS = EPS
    BATCH_SIZE = BS
    if HL==2:
        HIDDEN_UNITS = [HU1,HU2]
        DROP_RATE = [DR1,DR2]
    else:
        HIDDEN_UNITS = [HU1]
        DROP_RATE = [DR1]

    DECAY_RATE = DC
    if MODEL_ID==None:
        MODEL_NAME = 'LSTM-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(EPOCHS)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']'
    else:
        MODEL_NAME = 'LSTM-S'+str(MODEL_ID)+'-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(EPOCHS)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']'
    # RESUME_TRAINING = True
    def build_model():
        if HL==2:
            model = keras.Sequential(
            [
                layers.LSTM(HIDDEN_UNITS[0],activation=tf.nn.relu,return_sequences=True,input_shape=(train_x.shape[1],train_x.shape[2])),
                layers.Dropout(DROP_RATE[0], noise_shape=None, seed=None),
                layers.LSTM(HIDDEN_UNITS[1],activation=tf.nn.relu,return_sequences=False), # first hidden layer if hasnext hidden layer
                layers.Dropout(DROP_RATE[1], noise_shape=None, seed=None),
                # layers.LSTM(20,activation=tf.nn.relu,return_sequence=True),
                layers.Dense(1)
            ]
        )
        else:
            model = keras.Sequential(
                [
                    layers.LSTM(HIDDEN_UNITS[0],activation=tf.nn.relu,input_shape=(train_x.shape[1],train_x.shape[2])),
                    layers.Dropout(DROP_RATE[0], noise_shape=None, seed=None),
                    # layers.LSTM(HIDDEN_UNITS1,activation=tf.nn.relu,return_sequences=True,input_shape=(train_x.shape[1],train_x.shape[2])), # first hidden layer if hasnext hidden layer
                    # layers.LSTM(20,activation=tf.nn.relu,return_sequence=True),
                    layers.Dense(1)
                ]
            )
        optimizer = keras.optimizers.Adam(LEARNING_RATE,
        decay=DECAY_RATE
        )
        if loss=='mean_squared_error':
            print('Loss Function:mean_square_error')
            model.compile(
                loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error','mean_squared_error'])
        elif loss=='custom_loss':
            print('Custom Loss Function')
            model.compile(
                loss=custom_loss,
                optimizer=optimizer,
                metrics=['mean_absolute_error','mean_squared_error',custom_loss])
        return model
    # set model's parameters restore path
    cp_path = model_path+MODEL_NAME+'\\'
    if not os.path.exists(cp_path):
        os.makedirs(cp_path)
    checkpoint_path = model_path+MODEL_NAME+'\\cp.h5' #restore only the latest checkpoint after every update
    # checkpoint_path = model_path+'cp-{epoch:04d}.ckpt' #restore the checkpoint every period=x epoch
    checkpoint_dir = os.path.dirname(checkpoint_path)
    print('checkpoint dir:{}'.format(checkpoint_dir))
    cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,save_best_only=True,mode='min',save_weights_only=True,verbose=1)
    model = build_model()
    model.summary() #print a simple description for the model
    """
    # Evaluate before training or load trained weights and biases
    loss, mae, mse = model.evaluate(test_x, test_y, verbose=1)
    # Try the model with initial weights and biases
    example_batch = train_x[:10]
    example_result = model.predict(example_batch)
    print(example_result)
    """
    # 3.Train the model
    # Display training progress by printing a single dot for each completed epoch
    class PrintDot(keras.callbacks.Callback):
      def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')
    files = os.listdir(checkpoint_dir)

    from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping
    # reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',min_lr=0.00001,factor=0.2, verbose=1,patience=10, mode='min')
    early_stopping = EarlyStopping(monitor='val_loss', mode='min',verbose=1,patience=100,restore_best_weights=True)

    if MODEL_ID==None:
        warm_dir = 'LSTM-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(INITIAL_EPOCH)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']'
    else:
        warm_dir = 'LSTM-S'+str(MODEL_ID)+'-LR['+str(LEARNING_RATE)+']-HU'+str(HIDDEN_UNITS)+'-EPS['+str(INITIAL_EPOCH)+']-BS['+str(BATCH_SIZE)+']-DR'+str(DROP_RATE)+'-DC['+str(DECAY_RATE)+']-SEED['+str(SEED)+']'
    print(os.path.exists(model_path+warm_dir))
    if  RE_TRAIN: 
        print('retrain the model')
        if EARLY_STOPING:
            history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,
            callbacks=[
                cp_callback,
                early_stopping,
            ])
        else:
            history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback])
        hist2 = pd.DataFrame(history2.history)
        hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv')
        hist2['epoch']=history2.epoch
        # print(hist.tail())
        plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png')
    elif len(files)==0:
        if os.path.exists(model_path+warm_dir) and WARM_UP:
            print('WARM UP FROM EPOCH '+str(INITIAL_EPOCH))
            warm_path=model_path+warm_dir+'\\cp.ckpt'
            model.load_weights(warm_path)
            if EARLY_STOPING:
                history2 = model.fit(train_x,train_y,initial_epoch=INITIAL_EPOCH,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,
                callbacks=[
                    cp_callback,
                    early_stopping,
                    ])
            else:
                history2 = model.fit(train_x,train_y,initial_epoch=INITIAL_EPOCH,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback,])
            hist2 = pd.DataFrame(history2.history)
            hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv')
            hist2['epoch']=history2.epoch
            # print(hist.tail())
            plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png')
        else:
            print('new train')
            if EARLY_STOPING:
                history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[
                    cp_callback,
                    early_stopping,
                    ])
            else:
                history2 = model.fit(train_x,train_y,epochs=EPOCHS,batch_size=BATCH_SIZE ,validation_data=(dev_x,dev_y),verbose=1,callbacks=[cp_callback,])
            hist2 = pd.DataFrame(history2.history)
            hist2.to_csv(model_path+MODEL_NAME+'-HISTORY-TRAIN-TEST.csv')
            hist2['epoch']=history2.epoch
            # print(hist.tail())
            plot_history(history2,model_path+MODEL_NAME+'-MAE-ERRORS-TRAINTEST.png',model_path+MODEL_NAME+'-MSE-ERRORS-TRAINTEST.png')
    else:
        print('#'*10+'Already Trained')
        model.load_weights(checkpoint_path)


    model.load_weights(checkpoint_path)

        # loss, mae, mse = model.evaluate(test_x, test_y, verbose=1)
    """
    # Evaluate after training or load trained weights and biases
    loss, mae, mse = model.evaluate(test_x, test_y, verbose=1)
    print("Testing set Mean Abs Error: {:5.2f} ".format(mae))
    """
    # 4. Predict the model
    # load the unsample data
    train_predictions = model.predict(train_x).flatten()
    dev_predictions = model.predict(dev_x).flatten()
    test_predictions = model.predict(test_x).flatten()
    # plt.figure()
    # plt.plot(train_y,c='b')
    # plt.plot(train_predictions,c='r')
    # plt.show()
    # renormized the predictions and labels
    # load the normalized traindev indicators
    if MODEL_ID==None:
        norm = pd.read_csv(data_path+'norm_id.csv')
        sMax = norm['series_max'][norm.shape[0]-1]
        sMin = norm['series_min'][norm.shape[0]-1]
    else:
        norm = pd.read_csv(data_path+'norm_id_s'+str(MODEL_ID)+'.csv')
        sMax = norm['series_max'][norm.shape[0]-1]
        sMin = norm['series_min'][norm.shape[0]-1]

    print('Series min:{}'.format(sMin))
    print('Series max:{}'.format(sMax))

    train_y = np.multiply(train_y + 1,sMax - sMin) / 2 + sMin
    train_predictions = np.multiply(train_predictions + 1,sMax - sMin) / 2 + sMin
    dev_y = np.multiply(dev_y + 1,sMax - sMin) / 2 + sMin
    dev_predictions = np.multiply(dev_predictions + 1,sMax - sMin) / 2 + sMin
    test_y = np.multiply(test_y + 1,sMax - sMin) / 2 + sMin
    test_predictions = np.multiply(test_predictions + 1,sMax - sMin) / 2 + sMin

    print("pattern.find('multi')={}".format(pattern.find('multi')))
    print("pattern.find('one')={}".format(pattern.find('one')))

    if pattern.find('one')>=0:
        print('decomposition ensemble model!!!!!!!!!!!!!!!!!!!!!!!')
        train_predictions[train_predictions<0.0]=0.0
        dev_predictions[dev_predictions<0.0]=0.0
        test_predictions[test_predictions<0.0]=0.0
    elif pattern.find('one')<0 and pattern.find('multi')<0:
        print('monoscale model$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        train_predictions[train_predictions<0.0]=0.0
        dev_predictions[dev_predictions<0.0]=0.0
        test_predictions[test_predictions<0.0]=0.0


    dum_pred_results(
        path = model_path+MODEL_NAME+'.csv',
        train_y = train_y,
        train_predictions=train_predictions,
        dev_y = dev_y,
        dev_predictions = dev_predictions,
        test_y = test_y,
        test_predictions = test_predictions)

    plot_rela_pred(train_y,train_predictions,fig_savepath=model_path + MODEL_NAME + '-TRAIN-PRED.png')
    plot_rela_pred(dev_y,dev_predictions,fig_savepath=model_path + MODEL_NAME + "-DEV-PRED.png")
    plot_rela_pred(test_y,test_predictions,fig_savepath=model_path + MODEL_NAME + "-TEST-PRED.png")
    plot_error_distribution(test_predictions,test_y,model_path+MODEL_NAME+'-ERROR-DSTRI.png')
    plt.close('all')
    tf.keras.backend.clear_session()
def main(argv):
    """ Predict based on the trained model and specfic checkpoints. """
    assert len(argv) == 1
    (x_train_dev, y_train_dev), (x_train0, y_train0), (x_dev0, y_dev0), (
        x_test0, y_test0), (series_max,
                            series_min) = load_normalized_data('VMD_IMFS.xlsx',
                                                               seed=123)
    # data_file = 'ARMA_IMFs_PRED.xlsx'
    # data_file = 'SVR_IMFs_PRED.xlsx'
    # data_file = 'GBR_IMFs_PRED.xlsx'
    data_file = 'DNN_IMFs_PRED.xlsx'
    # print(10 * '-' + ' Data file: {}'.format(data_file))
    # # laod data from local disk.
    # (x_train_dev, y_train_dev), (x_train, y_train), (x_dev, y_dev), (
    #     x_test, y_test), (series_max,
    #                       series_min) = load_normalized_data(
    #                           data_file, seed=123)

    full_data_set = pd.read_excel(par_path_2 + '\\data\\' + data_file)
    full_norm_set = 2 * (full_data_set - series_min) / (series_max -
                                                        series_min) - 1
    series_len = len(full_norm_set)
    train_dev_set = full_norm_set[0:(series_len - 541)]
    y_train_dev = train_dev_set['Y']
    x_train_dev = train_dev_set.drop('Y', axis=1)
    # Get the test set
    test_set = full_norm_set[(series_len - 541):series_len]
    # Shuffle the data
    np.random.seed(123)
    # split the data into train/developing subsets
    x_train = train_dev_set.sample(frac=0.888888889, random_state=123)
    x_dev = train_dev_set.drop(x_train.index)
    # Extract the label from the features dataframe
    y_train = x_train.pop('Y')
    y_dev = x_dev.pop('Y')
    # print(test_set)
    x_test = test_set
    y_test = x_test.pop('Y')

    # create feature colums
    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7"),
        tf.feature_column.numeric_column("X8"),
        tf.feature_column.numeric_column("X9"),
        tf.feature_column.numeric_column("X10"),
    ]

    # recovery the model, and set the dropout rate to 0.0
    model_path = current_path + '/models/ensemble/'
    current_model = 'DNNRegressor_Hidden_Units[9, 8]'
    model_dir = model_path + current_model + '/'

    # model = tf.estimator.Estimator(
    #     model_fn=my_dnn_regression_fn,
    #     model_dir=model_dir,
    #     params={
    #         'feature_columns': feature_columns,
    #         # NOTE: Set the hidden units for predictions
    #         'hidden_units': [7],
    #         'drop_rates': [0.0]
    #     },
    # )

    model = tf.estimator.DNNRegressor(
        hidden_units=[9, 8],
        feature_columns=feature_columns,
        model_dir=model_dir,
    )

    train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_train,
                                                              shuffle=False)
    dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_dev,
                                                            shuffle=False)
    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_test,
                                                             shuffle=False)

    # Use the specific file to predict
    checkpoint_path = model_dir + 'model.ckpt-22400'

    # predict the training set by specfic checkpoint
    train_pred_results = model.predict(input_fn=train_pred_input_fn,
                                       checkpoint_path=checkpoint_path)
    # predict the developing set
    dev_pred_results = model.predict(input_fn=dev_pred_input_fn,
                                     checkpoint_path=checkpoint_path)
    # predict the testing set.
    test_pred_results = model.predict(input_fn=test_pred_input_fn,
                                      checkpoint_path=checkpoint_path)

    # Convert generator to numpy array
    train_predictions = np.array(
        list(p['predictions'] for p in train_pred_results))
    dev_predictions = np.array(list(p['predictions']
                                    for p in dev_pred_results))
    test_predictions = np.array(
        list(p['predictions'] for p in test_pred_results))

    # reshape the prediction to y shape.
    train_predictions = train_predictions.reshape(np.array(y_train).shape)
    dev_predictions = dev_predictions.reshape(np.array(y_dev).shape)
    test_predictions = test_predictions.reshape(np.array(y_test).shape)

    # Renormalize the records and predictions
    y_train = np.multiply(
        y_train + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    train_predictions = np.multiply(train_predictions + 1, series_max["Y"] -
                                    series_min["Y"]) / 2 + series_min["Y"]
    y_dev = np.multiply(
        y_dev + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    dev_predictions = np.multiply(dev_predictions + 1, series_max["Y"] -
                                  series_min["Y"]) / 2 + series_min["Y"]
    y_test = np.multiply(
        y_test + 1, series_max["Y"] - series_min["Y"]) / 2 + series_min["Y"]
    test_predictions = np.multiply(test_predictions + 1, series_max["Y"] -
                                   series_min["Y"]) / 2 + series_min["Y"]

    # compute R square
    r2_train = r2_score(y_train, train_predictions)
    r2_dev = r2_score(y_dev, dev_predictions)
    r2_test = r2_score(y_test, test_predictions)

    # compute MSE
    mse_train = mean_squared_error(y_train, train_predictions)
    mse_dev = mean_squared_error(y_dev, dev_predictions)
    mse_test = mean_squared_error(y_test, test_predictions)

    # compute MAE
    mae_train = mean_absolute_error(y_train, train_predictions)
    mae_dev = mean_absolute_error(y_dev, dev_predictions)
    mae_test = mean_absolute_error(y_test, test_predictions)

    # compute MAPE
    mape_train = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_train - train_predictions), y_train))), y_train.size) * 100
    mape_dev = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_dev - dev_predictions), y_dev))), y_dev.size) * 100
    mape_test = np.true_divide(
        np.sum(np.abs(np.true_divide(
            (y_test - test_predictions), y_test))), y_test.size) * 100

    #
    print('r2_score_train = {:.10f}'.format(r2_train))
    print('r2_score_dev = {:.10f}'.format(r2_dev))

    dump_train_dev_test_to_excel(path=model_path + current_model + data_file +
                                 '.xlsx',
                                 y_train=y_train,
                                 train_pred=train_predictions,
                                 r2_train=r2_train,
                                 mse_train=mse_train,
                                 mae_train=mae_train,
                                 mape_train=mape_train,
                                 y_dev=y_dev,
                                 dev_pred=dev_predictions,
                                 r2_dev=r2_dev,
                                 mse_dev=mse_dev,
                                 mae_dev=mae_dev,
                                 mape_dev=mape_dev,
                                 y_test=y_test,
                                 test_pred=test_predictions,
                                 r2_test=r2_test,
                                 mse_test=mse_test,
                                 mae_test=mae_test,
                                 mape_test=mape_test)

    plot_rela_pred(y_train,
                   train_predictions,
                   series_max,
                   series_min,
                   fig_savepath=model_path + current_model + data_file +
                   '_train_pred.tif')

    plot_rela_pred(y_dev,
                   dev_predictions,
                   series_max,
                   series_min,
                   fig_savepath=model_path + current_model + data_file +
                   "_dev_pred.tif")

    plot_rela_pred(y_test,
                   test_predictions,
                   series_max,
                   series_min,
                   fig_savepath=model_path + current_model + data_file +
                   "_test_pred.tif")
Example #5
0
                                 mse_dev=mse_dev,
                                 mae_dev=mae_dev,
                                 mape_dev=mape_dev,
                                 y_test=y_test,
                                 test_pred=test_predictions,
                                 r2_test=r2_test,
                                 mse_test=mse_test,
                                 mae_test=mae_test,
                                 mape_test=mape_test)

    # print(test_predictions)

    # plot the predicted line
    plot_rela_pred(y_train,
                   train_predictions,
                   series_max,
                   series_min,
                   fig_savepath=model_path + 'SVR_train_pred.png')

    # plot_normreconvert_relation(
    #     y_train,
    #     train_predictions,
    #     series_max,
    #     series_min,
    #     fig_savepath=model_path + "SVR_train_rela.png")

    plot_rela_pred(y_dev,
                   dev_predictions,
                   series_max,
                   series_min,
                   fig_savepath=model_path + "SVR_dev_pred.png")
def ensemble_optimization(root_path,
                          station,
                          variables,
                          orig_df,
                          pattern,
                          decomposer=None,
                          lev=None,
                          wavelet=None,
                          criterion='RMSE'):
    # load variables
    lags_dict = variables['lags_dict']
    full_len = variables['full_len']
    train_len = variables['train_len']
    dev_len = variables['dev_len']
    test_len = variables['test_len']

    if pattern.find('one') < 0 and pattern.find('multi') < 0:
        print('Moonscale Pattern')
        leading_time = int(pattern.split('_')[0])
    else:
        print('Decomposition ensemble pattern')
        leading_time = int(pattern.split('_')[2])

    def dict_to_list(dictionary):
        results = []
        for key in dictionary:
            results.append(dictionary[key])
        return results

    if decomposer == None:
        subsignals_num = None
        model_path = root_path + '/' + station + '_orig/projects/lstm-models-history/' + pattern + '/'
        lags = lags_dict['orig']
        leading_time = int(pattern.split('_')[0])
        train_samples_len = train_len - lags - leading_time + 1

    else:
        leading_time = int(pattern.split('_')[2])
        if wavelet == None:
            model_path = root_path + '/' + station + '_' + decomposer + '/projects/lstm-models-history/' + pattern + '/'
            lags = dict_to_list(lags_dict[decomposer])
            train_samples_len = train_len - max(lags) - leading_time + 1
            subsignals_num = lev
            assert lev == len(lags)
        else:
            model_path = root_path + '/' + station + '_' + decomposer + '/projects/lstm-models-history/' + wavelet + '-' + str(
                lev) + '/' + pattern + '/'
            lags = dict_to_list(lags_dict[wavelet + '-' + str(lev)])
            train_samples_len = train_len - max(lags) - leading_time + 1
            subsignals_num = lev + 1
            assert lev + 1 == len(lags)

    print('Station:{}'.format(station))
    print('Decomposer:{}'.format(decomposer))
    print('Decomposition level:{}'.format(lev))
    print('Prediction pattern:{}'.format(pattern))
    print('Wavelet:{}'.format(wavelet))
    print('full_len:{}'.format(full_len))
    print('train_len:{}'.format(train_len))
    print('dev_len:{}'.format(dev_len))
    print('test_len:{}'.format(test_len))
    print('train_samples_len:{}'.format(train_samples_len))
    print('lags:{}'.format(lags))
    print('Subsignals num:{}'.format(subsignals_num))
    print('Models are developed on {}'.format(criterion))

    if pattern.find('one_model') >= 0 or decomposer == None:
        print("################")
        signal_model = model_path + 'history/'
        criterion_dict = {}
        for files in os.listdir(signal_model):
            if files.find('.csv') >= 0 and (files.find('HISTORY') < 0
                                            and files.find('metrics') < 0):
                # print(files)
                data = pd.read_csv(signal_model + files)
                dev_y = data['dev_y'][0:dev_len]
                dev_pred = data['dev_pred'][0:dev_len]
                if criterion == 'RMSE':
                    criterion_dict[files] = data['rmse_dev'][0]
                elif criterion == 'NMSE':
                    NMSE = normalized_mean_square_error(y_true=dev_y,
                                                        y_pred=dev_pred)
                    criterion_dict[files] = NMSE

        key_min = min(criterion_dict.keys(), key=(lambda k: criterion_dict[k]))
        data = pd.read_csv(signal_model + key_min)
        train_y = data['train_y'][data.shape[0] - train_samples_len:]
        train_pred = data['train_pred'][data.shape[0] - train_samples_len:]
        train_pred[train_pred < 0.0] = 0.0
        train_y = train_y.reset_index(drop=True)
        train_pred = train_pred.reset_index(drop=True)
        train_results = pd.concat([train_y, train_pred], axis=1, sort=False)
        dev_y = data['dev_y'][0:dev_len]
        dev_pred = data['dev_pred'][0:dev_len]
        dev_pred[dev_pred < 0.0] = 0.0
        dev_results = pd.concat([dev_y, dev_pred], axis=1, sort=False)
        test_y = data['test_y'][0:test_len]
        test_pred = data['test_pred'][0:test_len]
        test_pred[test_pred < 0.0] = 0.0
        test_results = pd.concat([test_y, test_pred], axis=1, sort=False)

        max_streamflow = max(orig_df)
        ratio_train = train_pred / max_streamflow
        ratio_dev = dev_pred / max_streamflow
        ratio_test = test_pred / max_streamflow
        rto_train = pd.DataFrame(ratio_train, columns=['train'])['train']
        rto_dev = pd.DataFrame(ratio_dev, columns=['dev'])['dev']
        rto_test = pd.DataFrame(ratio_test, columns=['test'])['test']
        ratio_df = pd.concat([rto_train, rto_dev, rto_test], axis=1)
        ration1_5 = ratio_df[ratio_df > 1.5]
        ration2 = ratio_df[ratio_df > 2]
        count_1_5 = pd.concat([
            ration1_5['train'].value_counts(), ration1_5['dev'].value_counts(),
            ration1_5['test'].value_counts()
        ],
                              axis=1)
        count_2 = pd.concat([
            ration2['train'].value_counts(), ration2['dev'].value_counts(),
            ration2['test'].value_counts()
        ],
                            axis=1)
        count_1_5.to_csv(model_path + 'pred_div_maxtrue_ratio1_5_count.csv')
        count_2.to_csv(model_path + 'pred_div_maxtrue_ratio2_count.csv')
        ratio_df.to_csv(model_path + 'pred_div_maxtrue_ratio.csv')

        print('test_y=\n{}'.format(test_y))
        print('test_pred=\n{}'.format(test_pred))

        train_results.to_csv(model_path + 'model_train_results.csv',
                             index=None)
        dev_results.to_csv(model_path + 'model_dev_results.csv', index=None)
        test_results.to_csv(model_path + 'model_test_results.csv', index=None)

        plot_rela_pred(train_y.values, train_pred.values,
                       model_path + 'train_pred.png')
        plot_rela_pred(dev_y.values, dev_pred.values,
                       model_path + 'dev_pred.png')
        plot_rela_pred(test_y.values, test_pred.values,
                       model_path + 'test_pred.png')

        train_nse = r2_score(y_true=train_y.values, y_pred=train_pred.values)
        dev_nse = r2_score(y_true=dev_y.values, y_pred=dev_pred.values)
        test_nse = r2_score(y_true=test_y.values, y_pred=test_pred.values)
        train_nmse = normalized_mean_square_error(y_true=train_y,
                                                  y_pred=train_pred)
        dev_nmse = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred)
        test_nmse = normalized_mean_square_error(y_true=test_y,
                                                 y_pred=test_pred)
        train_rmse = math.sqrt(
            mean_squared_error(train_y.values, train_pred.values))
        dev_rmse = math.sqrt(mean_squared_error(dev_y.values, dev_pred.values))
        test_rmse = math.sqrt(
            mean_squared_error(test_y.values, test_pred.values))
        train_nrmse = math.sqrt(
            mean_squared_error(train_y.values, train_pred.values)) / (
                sum(train_y.values) / len(train_y.values))
        dev_nrmse = math.sqrt(mean_squared_error(
            dev_y.values,
            dev_pred.values)) / (sum(dev_y.values) / len(dev_y.values))
        test_nrmse = math.sqrt(
            mean_squared_error(test_y.values, test_pred.values)) / (
                sum(test_y.values) / len(test_y.values))
        train_mae = mean_absolute_error(y_true=train_y.values,
                                        y_pred=train_pred.values)
        dev_mae = mean_absolute_error(y_true=dev_y.values,
                                      y_pred=dev_pred.values)
        test_mae = mean_absolute_error(y_true=test_y.values,
                                       y_pred=test_pred.values)
        train_mape = np.mean(
            np.abs(
                (train_y.values - train_pred.values) / train_y.values)) * 100
        dev_mape = np.mean(
            np.abs((dev_y.values - dev_pred.values) / dev_y.values)) * 100
        test_mape = np.mean(
            np.abs((test_y.values - test_pred.values) / test_y.values)) * 100
        train_ppts = PPTS(train_y.values, train_pred.values, 5)
        dev_ppts = PPTS(dev_y.values, dev_pred.values, 5)
        test_ppts = PPTS(test_y.values, test_pred.values, 5)
        print('#' * 25 + 'train_ppts:\n{}'.format(train_ppts))
        print('#' * 25 + 'dev_ppts:\n{}'.format(dev_ppts))
        print('#' * 25 + 'test_ppts:\n{}'.format(test_ppts))
        metrics = {
            'optimal': key_min,
            'train_nse': train_nse,
            'train_nmse': train_nmse,
            'train_rmse': train_rmse,
            'train_nrmse': train_nrmse,
            'train_mae': train_mae,
            'train_mape': train_mape,
            'train_ppts': train_ppts,
            'dev_nse': dev_nse,
            'dev_nmse': dev_nmse,
            'dev_rmse': dev_rmse,
            'dev_nrmse': dev_nrmse,
            'dev_mae': dev_mae,
            'dev_mape': dev_mape,
            'dev_ppts': dev_ppts,
            'test_nse': test_nse,
            'test_nmse': test_nmse,
            'test_rmse': test_rmse,
            'test_nrmse': test_nrmse,
            'test_mae': test_mae,
            'test_mape': test_mape,
            'test_ppts': test_ppts,
        }

        metrics = pd.DataFrame(metrics, index=[0])
        metrics.to_csv(model_path + 'model_metrics.csv')

    else:
        train_ens_pred = pd.DataFrame()
        dev_ens_pred = pd.DataFrame()
        test_ens_pred = pd.DataFrame()
        train_ens_y = pd.DataFrame()
        dev_ens_y = pd.DataFrame()
        test_ens_y = pd.DataFrame()
        subsignal_metrics = pd.DataFrame()

        for i in range(1, subsignals_num + 1):
            sub_signal = 's' + str(i)
            signal_model = model_path + 'history/' + sub_signal + '/'
            criterion_dict = {}
            for files in os.listdir(signal_model):
                if files.find('.csv') >= 0 and (files.find('HISTORY') < 0
                                                and files.find('metrics') < 0):
                    # print(files)
                    data = pd.read_csv(signal_model + files)
                    dev_y = data['dev_y'][0:dev_len]
                    dev_pred = data['dev_pred'][0:dev_len]
                    if criterion == 'RMSE':
                        criterion_dict[files] = data['rmse_dev'][0]
                    elif criterion == 'NMSE':
                        NMSE = normalized_mean_square_error(y_true=dev_y,
                                                            y_pred=dev_pred)
                        criterion_dict[files] = NMSE

            key_min = min(criterion_dict.keys(),
                          key=(lambda k: criterion_dict[k]))
            data = pd.read_csv(signal_model + key_min)
            train_y = data['train_y'][data.shape[0] - train_samples_len:]
            train_pred = data['train_pred'][data.shape[0] - train_samples_len:]
            train_y = train_y.reset_index(drop=True)
            train_pred = train_pred.reset_index(drop=True)
            dev_y = data['dev_y'][0:dev_len]
            dev_pred = data['dev_pred'][0:dev_len]
            test_y = data['test_y'][0:test_len]
            test_pred = data['test_pred'][0:test_len]

            train_nse = r2_score(y_true=train_y.values,
                                 y_pred=train_pred.values)
            dev_nse = r2_score(y_true=dev_y.values, y_pred=dev_pred.values)
            test_nse = r2_score(y_true=test_y.values, y_pred=test_pred.values)
            train_nmse = normalized_mean_square_error(y_true=train_y,
                                                      y_pred=train_pred)
            dev_nmse = normalized_mean_square_error(y_true=dev_y,
                                                    y_pred=dev_pred)
            test_nmse = normalized_mean_square_error(y_true=test_y,
                                                     y_pred=test_pred)
            train_rmse = math.sqrt(
                mean_squared_error(train_y.values, train_pred.values))
            dev_rmse = math.sqrt(
                mean_squared_error(dev_y.values, dev_pred.values))
            test_rmse = math.sqrt(
                mean_squared_error(test_y.values, test_pred.values))
            train_nrmse = math.sqrt(
                mean_squared_error(train_y.values, train_pred.values)) / (
                    sum(train_y.values) / len(train_y.values))
            dev_nrmse = math.sqrt(
                mean_squared_error(dev_y.values, dev_pred.values)) / (
                    sum(dev_y.values) / len(dev_y.values))
            test_nrmse = math.sqrt(
                mean_squared_error(test_y.values, test_pred.values)) / (
                    sum(test_y.values) / len(test_y.values))
            train_mae = mean_absolute_error(y_true=train_y.values,
                                            y_pred=train_pred.values)
            dev_mae = mean_absolute_error(y_true=dev_y.values,
                                          y_pred=dev_pred.values)
            test_mae = mean_absolute_error(y_true=test_y.values,
                                           y_pred=test_pred.values)
            train_mape = np.mean(
                np.abs((train_y.values - train_pred.values) /
                       train_y.values)) * 100
            dev_mape = np.mean(
                np.abs((dev_y.values - dev_pred.values) / dev_y.values)) * 100
            test_mape = np.mean(
                np.abs(
                    (test_y.values - test_pred.values) / test_y.values)) * 100
            train_ppts = PPTS(train_y.values, train_pred.values, 5)
            dev_ppts = PPTS(dev_y.values, dev_pred.values, 5)
            test_ppts = PPTS(test_y.values, test_pred.values, 5)

            print('#' * 25 + 'train_ppts:\n{}'.format(train_ppts))
            print('#' * 25 + 'dev_ppts:\n{}'.format(dev_ppts))
            print('#' * 25 + 'test_ppts:\n{}'.format(test_ppts))

            metrics = {
                'optimal': key_min,
                'train_nse': train_nse,
                'train_nmse': train_nmse,
                'train_rmse': train_rmse,
                'train_nrmse': train_nrmse,
                'train_mae': train_mae,
                'train_mape': train_mape,
                'train_ppts': train_ppts,
                'dev_nse': dev_nse,
                'dev_nmse': dev_nmse,
                'dev_rmse': dev_rmse,
                'dev_nrmse': dev_nrmse,
                'dev_mae': dev_mae,
                'dev_mape': dev_mape,
                'dev_ppts': dev_ppts,
                'test_nse': test_nse,
                'test_nmse': test_nmse,
                'test_rmse': test_rmse,
                'test_nrmse': test_nrmse,
                'test_mae': test_mae,
                'test_mape': test_mape,
                'test_ppts': test_ppts,
            }

            metrics = pd.DataFrame(metrics, index=['s' + str(i)])
            subsignal_metrics = pd.concat([subsignal_metrics, metrics],
                                          sort=False)

            train_ens_pred = pd.concat([train_ens_pred, train_pred], axis=1)
            dev_ens_pred = pd.concat([dev_ens_pred, dev_pred], axis=1)
            test_ens_pred = pd.concat([test_ens_pred, test_pred], axis=1)
            train_ens_y = pd.concat([train_ens_y, train_y], axis=1)
            dev_ens_y = pd.concat([dev_ens_y, dev_y], axis=1)
            test_ens_y = pd.concat([test_ens_y, test_y], axis=1)

        subsignal_metrics.to_csv(model_path + 'subsignals_metrics.csv')
        plot_subsignals_preds(subsignals_y=test_ens_y,
                              subsignals_pred=test_ens_pred,
                              fig_savepath=model_path + 'subsignals_pred.png')
        train_pred = train_ens_pred.sum(axis=1)
        dev_pred = dev_ens_pred.sum(axis=1)
        test_pred = test_ens_pred.sum(axis=1)
        train_pred[train_pred < 0.0] = 0.0
        dev_pred[dev_pred < 0.0] = 0.0
        test_pred[test_pred < 0.0] = 0.0

        train_pred = train_pred.values
        dev_pred = dev_pred.values
        test_pred = test_pred.values

        print('train_pred len:{}'.format(len(train_pred)))

        train_y = orig_df[(train_len - train_samples_len):train_len]
        print('train_y len:{}'.format(train_y.shape[0]))
        dev_y = orig_df[train_len:train_len + test_len]
        test_y = orig_df[train_len + test_len:]
        train_y = train_y.reset_index(drop=True)
        dev_y = dev_y.reset_index(drop=True)
        test_y = test_y.reset_index(drop=True)
        train_y = train_y.values
        dev_y = dev_y.values
        test_y = test_y.values

        max_streamflow = max(orig_df)
        ratio_train = train_pred / max_streamflow
        ratio_dev = dev_pred / max_streamflow
        ratio_test = test_pred / max_streamflow
        rto_train = pd.DataFrame(ratio_train, columns=['train'])['train']
        rto_dev = pd.DataFrame(ratio_dev, columns=['dev'])['dev']
        rto_test = pd.DataFrame(ratio_test, columns=['test'])['test']
        ratio_df = pd.concat([rto_train, rto_dev, rto_test], axis=1)
        ration1_5 = ratio_df[ratio_df > 1.5]
        ration2 = ratio_df[ratio_df > 2]
        count_1_5 = pd.concat([
            ration1_5['train'].value_counts(), ration1_5['dev'].value_counts(),
            ration1_5['test'].value_counts()
        ],
                              axis=1)
        count_2 = pd.concat([
            ration2['train'].value_counts(), ration2['dev'].value_counts(),
            ration2['test'].value_counts()
        ],
                            axis=1)
        count_1_5.to_csv(model_path + 'pred_div_maxtrue_ratio1_5_count.csv')
        count_2.to_csv(model_path + 'pred_div_maxtrue_ratio2_count.csv')
        ratio_df.to_csv(model_path + 'pred_div_maxtrue_ratio.csv')

        train_nse = r2_score(y_true=train_y, y_pred=train_pred)
        train_nmse = normalized_mean_square_error(y_true=train_y,
                                                  y_pred=train_pred)
        train_rmse = math.sqrt(mean_squared_error(train_y, train_pred))
        train_nrmse = math.sqrt(mean_squared_error(
            train_y, train_pred)) / (sum(train_y) / len(train_y))
        train_mae = mean_absolute_error(train_y, train_pred)
        train_mape = np.mean(np.abs((train_y - train_pred) / train_y)) * 100
        train_ppts = PPTS(train_y, train_pred, 5)

        dev_nse = r2_score(y_true=dev_y, y_pred=dev_pred)
        dev_nmse = normalized_mean_square_error(y_true=dev_y, y_pred=dev_pred)
        dev_rmse = math.sqrt(mean_squared_error(dev_y, dev_pred))
        dev_nrmse = math.sqrt(mean_squared_error(
            dev_y, dev_pred)) / (sum(dev_y) / len(dev_y))
        dev_mae = mean_absolute_error(dev_y, dev_pred)
        dev_mape = np.mean(np.abs((dev_y - dev_pred) / dev_y)) * 100
        dev_ppts = PPTS(dev_y, dev_pred, 5)

        test_nse = r2_score(y_true=test_y, y_pred=test_pred)
        test_nmse = normalized_mean_square_error(y_true=test_y,
                                                 y_pred=test_pred)
        test_rmse = math.sqrt(mean_squared_error(test_y, test_pred))
        test_nrmse = math.sqrt(mean_squared_error(
            test_y, test_pred)) / (sum(test_y) / len(test_y))
        test_mae = mean_absolute_error(test_y, test_pred)
        test_mape = np.mean(np.abs((test_y - test_pred) / test_y)) * 100
        test_ppts = PPTS(test_y, test_pred, 5)
        model_metrics = {
            'train_nse': train_nse,
            'train_nmse': train_nmse,
            'train_rmse': train_rmse,
            'train_nrmse': train_nrmse,
            'train_mae': train_mae,
            'train_mape': train_mape,
            'train_ppts': train_ppts,
            'dev_nse': dev_nse,
            'dev_nmse': dev_nmse,
            'dev_rmse': dev_rmse,
            'dev_nrmse': dev_nrmse,
            'dev_mae': dev_mae,
            'dev_mape': dev_mape,
            'dev_ppts': dev_ppts,
            'test_nse': test_nse,
            'test_nmse': test_nmse,
            'test_rmse': test_rmse,
            'test_nrmse': test_nrmse,
            'test_mae': test_mae,
            'test_mape': test_mape,
            'test_ppts': test_ppts,
        }
        model_train_results = {
            'train_y': train_y,
            'train_pred': train_pred,
        }
        model_dev_results = {
            'dev_y': dev_y,
            'dev_pred': dev_pred,
        }
        model_test_results = {
            'test_y': test_y,
            'test_pred': test_pred,
        }
        MODEL_METRICS = pd.DataFrame(model_metrics,
                                     index=np.arange(start=0, stop=1, step=1))
        MODEL_TRAIN_RESULTS = pd.DataFrame(model_train_results,
                                           index=np.arange(
                                               start=0,
                                               stop=train_samples_len,
                                               step=1))
        MODEL_DEV_RESULTS = pd.DataFrame(model_dev_results,
                                         index=np.arange(start=0,
                                                         stop=dev_len,
                                                         step=1))
        MODEL_TEST_RESULTS = pd.DataFrame(model_test_results,
                                          index=np.arange(start=0,
                                                          stop=test_len,
                                                          step=1))
        MODEL_METRICS.to_csv(model_path + 'model_metrics.csv')
        MODEL_TRAIN_RESULTS.to_csv(model_path + 'model_train_results.csv')
        MODEL_DEV_RESULTS.to_csv(model_path + 'model_dev_results.csv')
        MODEL_TEST_RESULTS.to_csv(model_path + 'model_test_results.csv')
        plot_rela_pred(train_y, train_pred, model_path + 'train_pred.png')
        plot_rela_pred(dev_y, dev_pred, model_path + 'dev_pred.png')
        plot_rela_pred(test_y, test_pred, model_path + 'test_pred.png')
    plt.close('all')