def train_pred_eval_model(x_train_scaled, y_train_scaled, x_test_scaled, y_test, mu_teste_list, std_test_list, scale,lstm_units=10, \
                          dropout_prob=0.5, optimizer='rmsprop', epochs=50, batch_size=64,model = None):
    '''
           Train model, do prediction, scale back to original range and do evaluation
           Use LSTM here.
           Returns rmse, mape and predicted values
           Outputs
               rmse            : root mean square error
               mape            : mean absolute percentage error
               est             : predictions
    '''
    if model == None:
        model = lstm_network.lstm_network().build_network(
            time_step=x_train_scaled.shape[1],
            features_num=x_train_scaled.shape[2],
            dropout_prob=dropout_prob,
            dense_units=y_train_scaled.shape[1],
            lstm_units=lstm_units,
            optimizer=optimizer)

    # Compile and fit the LSTM network
    model.fit(x_train_scaled,
              y_train_scaled,
              epochs=epochs,
              batch_size=batch_size,
              verbose=1)

    # Do prediction
    timesteps = x_test_scaled.shape[1]
    est_scaled = []
    pre_num = len(y_test)
    predict_y = []
    pre_xlist = []  #预测的输入列表
    pre_xlist.extend(x_test_scaled[0, :, 0].tolist())
    # test_x = x_test_scaled[0:1,:,:] #(1,10,1)
    for i in range(pre_num):
        predictx = np.array(pre_xlist[-timesteps:])
        predictx = np.reshape(predictx, (1, timesteps, 1))
        pre_y = model.predict(predictx)
        pre_xlist.extend(pre_y[0])
        est_scaled.extend(pre_y)

        # pre = model.predict(test_x) #(1,10)
        # est_scaled.append(pre)
        # pre= np.reshape(pre,(1,x_test_scaled.shape[1],x_test_scaled.shape[2]))
        # test_x = pre

    est_scaled = np.array(est_scaled).reshape(-1, 1)
    # est = (est_scaled * np.array(std_test_list).reshape(-1, 1)) + np.array(mu_teste_list).reshape(-1, 1)
    est = scale.inverse_transform(est_scaled)
    # est = np.array(est).reshape(x_test_scaled.shape[0],x_test_scaled.shape[1])
    y_test = np.reshape(y_test, (-1, 1))
    rmse = math.sqrt(mean_squared_error(y_test, est))
    mape = loss.get_mape(y_test, est)

    return rmse, mape, est, model
def predict(model, x_test_scaled, y_test, scaler):
    pre_scaled_y = model.predict(x_test_scaled)
    pre_scaled_y = np.reshape(pre_scaled_y, (-1, 1))
    construct_pre = np.tile(pre_scaled_y, (1, feature_num))
    pre_y = scaler.inverse_transform(construct_pre)[:, cap_col_index - 1]
    y_test = np.reshape(y_test, (-1, 1))
    mape = loss.get_mape(y_test, pre_y)
    mse = loss.get_rmse(y_test, pre_y)
    print("mape:{0:.4},mse:{1:.4}".format(mape, mse))
    return pre_y, mape, mse
Ejemplo n.º 3
0
def main():
    #TODO 调试多特征值输入

    parser = argparse.ArgumentParser(description='LSTM RUL Prediction')
    parser.add_argument('--filename',
                        type=str,
                        default="data/2017_06_30_cell8_data.csv")
    parser.add_argument('--output_path',
                        type=str,
                        default="snapshot/single_variable")
    parser.add_argument('--predict_measure',
                        type=int,
                        default=0,
                        choices=[0, 1])
    parser.add_argument('--sequence_length',
                        type=int,
                        default=20,
                        help='time_step in lstm')
    parser.add_argument('--split',
                        default=0.6,
                        help='split of train and test set')
    parser.add_argument('--batch_size',
                        type=int,
                        default=8,
                        help='input batch size for training (default: 8)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--dropout', default=0.5)
    parser.add_argument('--saved_figure_path',
                        default='result/single_variable/')
    parser.add_argument('--feature_num',
                        type=int,
                        default=1,
                        help='single feature use 1,multi use 7')
    parser.add_argument(
        '--usecols',
        default=[9, 10],
        type=int,
        nargs='+',
        help=
        'single feature imput use [9,10], multi use [3, 4, 5, 6, 7, 8, 9, 10]')
    #必须设置type=int,否则脚本执行时会导致get_model返回None
    parser.add_argument(
        '--get_model_measure',
        default=1,
        type=int,
        help='0 for define model from begin, 1 for load exist model')
    #cell 个数越小结果越精确, 数据太少?
    parser.add_argument('--lstm_units', default=50, type=int)

    args = parser.parse_args()
    split = args.split
    dropout_prob = args.dropout
    sequence_length = args.sequence_length
    batch_size = args.batch_size
    epochs = args.epochs
    predict_measure = args.predict_measure  # 0 for predicting one cycle,1 for predicting len(test(y)) cycles continuely, use current predicted value as the next input.
    filename = args.filename
    save_filepath = args.saved_figure_path
    feature_num = args.feature_num  #用于训练的每个数据样本的特征数
    usecols = args.usecols  #要读取的数据文件的列
    get_model_measure = args.get_model_measure
    lstm_units = args.lstm_units

    loss_file_path = args.output_path
    loss_file_name = 'seqlen:{0}_mse_mape_{1}.txt'.format(
        str(sequence_length), str(get_time()))

    fo = open(osp.join(loss_file_path, loss_file_name), 'w')

    fo.write(str('N,batch_size,epochs,mse,mape\n'))
    fo.flush()

    batch_size_list = [8, 16, 32, 64, 128]
    epochs_list = [50, 75, 100, 150, 200]

    dataloader = load_data.load_data(filename,
                                     sequence_length,
                                     split,
                                     usecols=usecols)
    sca_x, sca_y = dataloader.load_scaler()
    train_x, train_y, test_x, test_y = dataloader.get_x_y()
    all_y = dataloader.get_all_y()

    train_x = np.reshape(train_x,
                         (train_x.shape[0], train_x.shape[1], feature_num))
    test_x = np.reshape(test_x,
                        (test_x.shape[0], test_x.shape[1], feature_num))
    print(train_y.shape)

    lstm = lstm_model.lstm()
    model = get_model(lstm,
                      get_model_measure,
                      sequence_length,
                      feature_num,
                      dropout_prob,
                      lstm_units=lstm_units)
    lstm.train_model(model, train_x, train_y, batch_size, epochs)
    predict_y = lstm.predict(model, test_x, pre_way=predict_measure)

    # sca_x, sca_y = dataloader.get_scaler_x_y()
    train_y = sca_y.inverse_transform(train_y)
    test_y = sca_y.inverse_transform(test_y)
    predict_y = sca_y.inverse_transform(predict_y)

    mse = loss.get_rmse(test_y, predict_y)
    mape = loss.get_mape(test_y, predict_y)

    err_str = '{0},{1},{2},{3},{4}\n'.format(sequence_length, batch_size,
                                             epochs, mse, mape)
    fo.write(str(err_str))
    fo.flush()

    plotfilename = 'seqLen:{0}_batchsize:{1}_epochs:{2}_preMeasure:{3}_dropout:{4}'.format(
        sequence_length, batch_size, epochs, predict_measure, dropout_prob)
    title = plotfilename + '\nmse:{0}_mape:{1}'.format(mse, mape)
    plot_and_save(title, sequence_length, plotfilename, save_filepath, all_y,
                  test_y, train_y, predict_y)
    fo.close()
def multi_cell_file(bound, split, timestep, pre_step, batchsize, epochs,
                    dropout_prob, failure):

    ##TODO:43和46的前600个循环用于训练出一个模型,然后用于预测俩个电池的后0.3数据。
    cell_name_header = 'Statistics_1-'
    #获取多文件的训练和测试原数据
    train, test, all = [], [], []
    cell_filenames = []
    for cell_num in cell_nums:
        cell_filename = cell_name_header + cell_num
        cell_filenames.append(cell_filename)
        cell_train, cell_test, cell_all = get_origin_train_test(
            data_path,
            cell_filename,
            split,
            timestep,
            bound=bound,
            usecols=multi_usecols)
        train.append(cell_train)
        test.append(cell_test)
        all.append(cell_all)

    #获取每个test cell的RUL_actual
    rul_list = []
    keys = ['start_pre_cycle', 'eol_cycle', 'rul', 'eol_cap', 'eol_cap_norm']
    for i in range(len(all) - 2, len(all)):
        values = get_rul(all[i], split,
                         col_index=cap_col_index)  #python函数返回值其实是一个tuple。
        rul_list.append(dict(zip(keys, values)))

    #将训练数据合成一个,进行scale处理
    train_all = np.array(train).reshape(-1, feature_num)
    train_all_scaled, scaler = get_scaled_train(train_all)
    train_all_scaled = np.reshape(train_all_scaled,
                                  (len(train), -1, feature_num))

    #将scaled数据分别拆分成x和y,最后合成一个大的train数据集喂入网络
    x_train_scaled, y_train_scaled = [], []
    x_test_scaled, y_test = [], []
    for i in range(len(train_all_scaled)):
        x_cell_train_scaled, y_cel_train_scaled = get_x_y(
            train_all_scaled[i], timestep, cap_col_index - 1, pre_step)
        x_train_scaled.append(x_cell_train_scaled)
        y_train_scaled.append(y_cel_train_scaled)
        # for i in range(len(train_all_scaled)-2,len(train_all_scaled)):
        x_cell_test_scaled, y_cell_test = get_x_scaled_y(
            test[i], timestep, scaler, cap_col_index - 1, pre_step)
        x_test_scaled.append(x_cell_test_scaled)
        y_test.append(y_cell_test)
    x_train_scaled = np.reshape(x_train_scaled, (-1, timestep, feature_num))
    y_train_scaled = np.array(y_train_scaled).reshape(-1, pre_step)

    #训练
    model = lstm_network.lstm_network().build_network(
        timestep, feature_num, dense_units=pre_step, dropout_prob=dropout_prob)
    model.fit(x_train_scaled, y_train_scaled, batchsize, epochs)

    #预测
    for i in range(len(x_test_scaled) - 2, len(x_test_scaled)):
        pre_scaled_y = model.predict(x_test_scaled[i])
        # x_test_scaled[i][:,col_index-1:col_index] = pre_scaled_y
        pre_scaled_y = np.reshape(pre_scaled_y, (-1, 1))
        construct_pre = np.tile(pre_scaled_y, (1, feature_num))
        pre_y = scaler.inverse_transform(construct_pre)[:, cap_col_index - 1]
        y_test_cell = np.reshape(y_test[i], (-1, 1))
        mape = loss.get_mape(y_test_cell, pre_y)
        mse = loss.get_rmse(y_test_cell, pre_y)
        mape_mse_info = "mape:{0:.4},mse:{1:.4}".format(mape, mse)
        print(mape_mse_info)
        pre_rul = get_pre_rul(
            pre_y, rul_list[i - len(x_test_scaled) + 2]['eol_cap_norm'])
        rul_error = rul_list[i - len(x_test_scaled) + 2]['rul'] - pre_rul
        info = str(i) + '_RUL_actual:{0},LSTM:{1},error:{2},'.format(
            rul_list[i - len(x_test_scaled) + 2]['rul'], pre_rul,
            rul_error) + mape_mse_info
        print(info)
        error_2_csv(cell_filenames[i],
                    rul_list[i - len(x_test_scaled) + 2]['eol_cap'],
                    rul_list[i - len(x_test_scaled) + 2]['start_pre_cycle'],
                    rul_list[i - len(x_test_scaled) + 2]['eol_cycle'],
                    rul_list[i - len(x_test_scaled) + 2]['rul'], pre_rul,
                    rul_error, mape, mse, failure)
        all_y = np.array(all[i][:, cap_col_index]).reshape(-1, 1)
        utils.plot_and_save(
            rul_list[i - len(x_test_scaled) + 2]['eol_cap_norm'],
            cell_filenames[i], timestep, info, 'result/sks_figure/', all_y,
            test[i], train[i], pre_y)