def predict(test_split, lstm_layers, lstm_layer_nodes, frozen_layer_names, add_layer_num, mode, draw_fitting_curve=False): model_root_path = '../Models/LSTM1/TransferLearning' cols = ['PM25', 'Press', 'RH', 'Temp', 'Wind Speed', 'Wind Direction', 'Month', 'Day', 'Hour'] data_path = '../DataSet/Processed/Train/060376012_2016_2017_v1.csv' df_raw = pd.read_csv(data_path, usecols=cols, dtype=str) time_steps = { 'PM25': 24, 'Press': 24, 'RH': 8, 'Temp': 7, 'Wind Speed': 2, 'Wind Direction': 4 } if mode == 0: model_path = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str( lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str( add_layer_num) + '.json' weight_path = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str( lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str( add_layer_num) + '.best.hdf5' else: model_path = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str( lstm_layer_nodes) + '.direct.json' weight_path = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str( lstm_layer_nodes) + '.direct.best.hdf5' train_num = int(len(df_raw) * (1 - test_split)) X_train, X_test, y_train, y_test, y_scaler = process_data(df_raw, time_steps, train_num) # load model and weights json_string = open(model_path).read() model = model_from_json(json_string) model.load_weights(weight_path) model.summary() # predict y_pred = model.predict(X_test) y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1)) y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)) if draw_fitting_curve: evaluate.draw_fitting_curve(y_true, y_pred) del model gc.collect() K.clear_session() tf.reset_default_graph() return evaluate.all_metrics(y_true, y_pred)
def rnn(X_train, X_test, y_train, y_test, y_scaler, train_num): """ recurrent neural network :param X_train: :param X_test: :param y_train: :param y_test: :param y_scaler: :return: """ model_path = '../Models/RNN/rnn_model_060376012.json' weight_path = '../Models/RNN/rnn_weights_060376012.best.hdf5' X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1])) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) y_train = y_train.reshape((y_train.shape[0], 1, y_train.shape[1])) y_test = y_test.reshape((y_test.shape[0], 1, y_test.shape[1])) if os.path.exists(model_path) and os.path.exists(weight_path): print('load model...') model = lstm.load_model_and_weights(model_path, weight_path) else: print('训练模型...') model = Sequential() model.add(SimpleRNN(input_shape=(X_train.shape[1], X_train.shape[2]), output_dim=300, dropout=0.3, return_sequences=True)) model.add(SimpleRNN(output_dim=300, dropout=0.3, return_sequences=True)) model.add(SimpleRNN(output_dim=300, dropout=0.3, return_sequences=True)) model.add(Dense(1024)) model.add(Dropout(0.5)) model.add(Dense(1)) open(model_path, 'w').write(model.to_json()) model.compile(loss='mse', optimizer='RMSprop') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] history = model.fit(X_train, y_train, epochs=500, batch_size=512, validation_data=(X_test, y_test), verbose=1, callbacks=callbacks_list, shuffle=False) y_pred = model.predict(X_test) y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler, train_num=train_num) y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler, train_num=train_num) evaluate.all_metrics(y_test, y_pred) evaluate.draw_fitting_curve(y_test, y_pred, 0)
def lasso(X_train, X_test, y_train, y_test, y_scaler, train_num): """ LASSO :param X_train: :param X_test: :param y_train: :param y_test: :param y_scaler: :return: """ # las = LassoCV(alphas=[0.0001, 0.0005, 0.00005, 0.00002, 0.00001, 0.000001]) las = LassoCV(alphas=[0.0003, 0.0002]) las.fit(X_train, y_train) y_pred = las.predict(X_test) y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler, train_num=train_num) y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler, train_num=train_num) evaluate.all_metrics(y_test, y_pred) evaluate.draw_fitting_curve(y_test, y_pred, 0)
def ridge(X_train, X_test, y_train, y_test, y_scaler, train_num): """ Ridge regression :param X_train: :param X_test: :param y_train: :param y_test: :param y_scaler: :return: """ # rig = RidgeCV(alphas=[1, 0.5, 0.1, 0.01, 0.05, 0.001, 0.005]) rig = RidgeCV(alphas=[5.0, 10.0]) rig.fit(X_train, y_train) y_pred = rig.predict(X_test) y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler, train_num=train_num) y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler, train_num=train_num) evaluate.all_metrics(y_test, y_pred) evaluate.draw_fitting_curve(y_test, y_pred, 0)
def svr(X_train, X_test, y_train, y_test, y_scaler, train_num): """ support vector regression model :param X_train: :param X_test: :param y_train: :param y_test: :param y_scaler: :param train_num: :return: """ sv = SVR(kernel='rbf', C=1) sv.fit(X_train, y_train) y_pred = sv.predict(X_test) y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler, train_num=train_num) y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler, train_num=train_num) evaluate.all_metrics(y_test, y_pred) evaluate.draw_fitting_curve(y_test, y_pred, 0)
def ann(X_train, X_test, y_train, y_test, y_scaler, train_num): """ artificial neural network :param X_train: :param X_test: :param y_train: :param y_test: :param y_scaler: :return: """ model_path = '../Models/ANN/ann_model_060376012.json' weight_path = '../Models/ANN/ann_weights_060376012.best.hdf5' if os.path.exists(model_path): print('load model...') model = lstm.load_model_and_weights(model_path, weight_path) else: print('训练模型...') model = Sequential() model.add(Dense(500, input_dim=(len(X_train[0])), activation='relu')) model.add(Dropout(0.3)) model.add(Dense(1000, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(500, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(1)) open(model_path, 'w').write(model.to_json()) model.compile(loss='mse', optimizer='RMSprop') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] history = model.fit(X_train, y_train, epochs=500, batch_size=512, validation_data=(X_test, y_test), verbose=1, callbacks=callbacks_list, shuffle=False) y_pred = model.predict(X_test) y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler, train_num=train_num) y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler, train_num=train_num) evaluate.all_metrics(y_test, y_pred) evaluate.draw_fitting_curve(y_test, y_pred, 0)
def main(conf, is_train=True): df_raw = data.get_raw_data(conf['data_conf']['data_path'], usecols=conf['data_conf']['usecols']) X_train, X_test, y_train, y_test, y_scaler = data.process_data_for_lstm(df_raw, conf['model_conf']) if is_train: if os.path.exists(model_path): json_string = open(model_path).read() model = model_from_json(json_string) # 有参数则加载 if os.path.exists(weight_path): print('load weights ' + weight_path) model.load_weights(weight_path) else: model = Sequential() model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) model.add(LSTM(32, return_sequences=False)) model.add(Dense(units=64, activation='linear')) model.add(Dense(units=1)) open(model_path, 'w').write(model.to_json()) model.compile(loss='mse', optimizer='RMSprop') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] history = model.fit(X_train, y_train, epochs=conf['model_conf']['epochs'], batch_size=conf['model_conf']['batch_size'], validation_data=(X_test, y_test), verbose=1, callbacks=callbacks_list, shuffle=False) evaluate.draw_loss_curve(figure_num=conf['model_conf']['target_col'], train_loss=history.history['loss'], val_loss=history.history['val_loss']) else: json_string = open(model_path).read() model = model_from_json(json_string) model.load_weights(weight_path) y_pred = model.predict(X_test) y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1)) y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)) df_all_metrics = evaluate.all_metrics(y_true[: len(y_true) - 1], y_pred[1:]) evaluate.draw_fitting_curve(y_true[: len(y_true) - 1], y_pred[1:])
def main(is_train=True): data_path = '../DataSet/Processed/Train/261630033_2016_2017_v1.csv' # model_path = '../Models/Test/model.best.json' # weight_path = '../Models/Test/weights.best.hdf5' model_path = '../Models/Test/model_epochs10_batch24.best.json' weight_path = '../Models/Test/weights_epochs10_batch24.best.hdf5' df_raw = data.get_raw_data(data_path, ['PM25'], dtype=float) seq_data = np.array(df_raw).reshape(1, -1)[0] test_split = 0.4 time_steps = 4 new_data = [] for i in range(len(df_raw) - time_steps): new_data.append(list(seq_data[i:i + time_steps + 1])) new_data = np.array(new_data) train_num = int(len(new_data) * (1 - test_split)) y_scaled, y_scaler = data.min_max_scale(new_data[:, -1].reshape(-1, 1)) X_scaled, X_scaler = data.min_max_scale(new_data[:, 0:time_steps]) y_train = y_scaled[:train_num, :].reshape(1, -1)[0] y_test = y_scaled[train_num:, :].reshape(1, -1)[0] X_train = X_scaled[:train_num, :] X_test = X_scaled[train_num:, :] X_train = X_train.reshape(X_train.shape[0], time_steps, 1) X_test = X_test.reshape(X_test.shape[0], time_steps, 1) if is_train: if os.path.exists(model_path): json_string = open(model_path).read() model = model_from_json(json_string) # 有参数则加载 if os.path.exists(weight_path): print('load weights ' + weight_path) model.load_weights(weight_path) else: model = Sequential() model.add( LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) model.add(LSTM(32, return_sequences=False)) model.add(Dense(units=64, activation='linear')) model.add(Dense(units=1)) open(model_path, 'w').write(model.to_json()) model.compile(loss='mse', optimizer='RMSprop') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] history = model.fit(X_train, y_train, epochs=20, batch_size=24, validation_data=(X_test, y_test), verbose=1, callbacks=callbacks_list, shuffle=False) evaluate.draw_loss_curve(figure_num='PM2.5', train_loss=history.history['loss'], val_loss=history.history['val_loss']) else: json_string = open(model_path).read() model = model_from_json(json_string) model.load_weights(weight_path) y_pred = model.predict(X_test) y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1)) y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)) df_all_metrics = evaluate.all_metrics(y_true[:len(y_true) - 1], y_pred[1:]) evaluate.draw_fitting_curve(y_true[:len(y_true) - 1], y_pred[1:])