Exemple #1
0
def rnn(X_train, X_test, y_train, y_test, y_scaler, train_num):
    """
    recurrent neural network
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param y_scaler:
    :return:
    """
    model_path = '../Models/RNN/rnn_model_060376012.json'
    weight_path = '../Models/RNN/rnn_weights_060376012.best.hdf5'

    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)
    y_train = y_train.reshape((y_train.shape[0], 1, y_train.shape[1]))
    y_test = y_test.reshape((y_test.shape[0], 1, y_test.shape[1]))
    if os.path.exists(model_path) and os.path.exists(weight_path):
        print('load model...')
        model = lstm.load_model_and_weights(model_path, weight_path)
    else:
        print('训练模型...')
        model = Sequential()
        model.add(SimpleRNN(input_shape=(X_train.shape[1], X_train.shape[2]), output_dim=300, dropout=0.3,
                            return_sequences=True))
        model.add(SimpleRNN(output_dim=300, dropout=0.3, return_sequences=True))
        model.add(SimpleRNN(output_dim=300, dropout=0.3, return_sequences=True))
        model.add(Dense(1024))
        model.add(Dropout(0.5))
        model.add(Dense(1))
        open(model_path, 'w').write(model.to_json())
        model.compile(loss='mse', optimizer='RMSprop')
        checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        callbacks_list = [checkpoint]
        history = model.fit(X_train, y_train, epochs=500, batch_size=512, validation_data=(X_test, y_test),
                            verbose=1, callbacks=callbacks_list, shuffle=False)
    y_pred = model.predict(X_test)
    y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    evaluate.all_metrics(y_test, y_pred)
    evaluate.draw_fitting_curve(y_test, y_pred, 0)
Exemple #2
0
def lasso(X_train, X_test, y_train, y_test, y_scaler, train_num):
    """
    LASSO
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param y_scaler:
    :return:
    """
    # las = LassoCV(alphas=[0.0001, 0.0005, 0.00005, 0.00002, 0.00001, 0.000001])
    las = LassoCV(alphas=[0.0003, 0.0002])
    las.fit(X_train, y_train)
    y_pred = las.predict(X_test)
    y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    evaluate.all_metrics(y_test, y_pred)
    evaluate.draw_fitting_curve(y_test, y_pred, 0)
Exemple #3
0
def ridge(X_train, X_test, y_train, y_test, y_scaler, train_num):
    """
    Ridge regression
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param y_scaler:
    :return:
    """
    # rig = RidgeCV(alphas=[1, 0.5, 0.1, 0.01, 0.05, 0.001, 0.005])
    rig = RidgeCV(alphas=[5.0, 10.0])
    rig.fit(X_train, y_train)
    y_pred = rig.predict(X_test)
    y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    evaluate.all_metrics(y_test, y_pred)
    evaluate.draw_fitting_curve(y_test, y_pred, 0)
Exemple #4
0
def svr(X_train, X_test, y_train, y_test, y_scaler, train_num):
    """
    support vector regression model
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param y_scaler:
    :param train_num:
    :return:
    """
    sv = SVR(kernel='rbf', C=1)
    sv.fit(X_train, y_train)
    y_pred = sv.predict(X_test)
    y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    evaluate.all_metrics(y_test, y_pred)
    evaluate.draw_fitting_curve(y_test, y_pred, 0)
Exemple #5
0
def ann(X_train, X_test, y_train, y_test, y_scaler, train_num):
    """
    artificial neural network
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param y_scaler:
    :return:
    """
    model_path = '../Models/ANN/ann_model_060376012.json'
    weight_path = '../Models/ANN/ann_weights_060376012.best.hdf5'

    if os.path.exists(model_path):
        print('load model...')
        model = lstm.load_model_and_weights(model_path, weight_path)
    else:
        print('训练模型...')
        model = Sequential()
        model.add(Dense(500, input_dim=(len(X_train[0])), activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(1000, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(500, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(1))
        open(model_path, 'w').write(model.to_json())
        model.compile(loss='mse', optimizer='RMSprop')
        checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        callbacks_list = [checkpoint]
        history = model.fit(X_train, y_train, epochs=500, batch_size=512, validation_data=(X_test, y_test),
                            verbose=1, callbacks=callbacks_list, shuffle=False)
    y_pred = model.predict(X_test)
    y_test = data.inverse_to_original_data(y_train.reshape(1, -1), y_test.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    y_pred = data.inverse_to_original_data(y_train.reshape(1, -1), y_pred.reshape(1, -1), scaler=y_scaler,
                                           train_num=train_num)
    evaluate.all_metrics(y_test, y_pred)
    evaluate.draw_fitting_curve(y_test, y_pred, 0)
def predict(test_split, lstm_layers, lstm_layer_nodes, frozen_layer_names, add_layer_num, mode, draw_fitting_curve=False):
    model_root_path = '../Models/LSTM1/TransferLearning'
    cols = ['PM25', 'Press', 'RH', 'Temp', 'Wind Speed', 'Wind Direction', 'Month', 'Day', 'Hour']
    data_path = '../DataSet/Processed/Train/060376012_2016_2017_v1.csv'
    df_raw = pd.read_csv(data_path, usecols=cols, dtype=str)
    time_steps = {
        'PM25': 24,
        'Press': 24,
        'RH': 8,
        'Temp': 7,
        'Wind Speed': 2,
        'Wind Direction': 4
    }
    if mode == 0:
        model_path = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str(
            add_layer_num) + '.json'
        weight_path = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str(
            add_layer_num) + '.best.hdf5'
    else:
        model_path = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.direct.json'
        weight_path = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.direct.best.hdf5'
    train_num = int(len(df_raw) * (1 - test_split))
    X_train, X_test, y_train, y_test, y_scaler = process_data(df_raw, time_steps, train_num)

    # load model and weights
    json_string = open(model_path).read()
    model = model_from_json(json_string)
    model.load_weights(weight_path)
    model.summary()

    # predict
    y_pred = model.predict(X_test)
    y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1))
    y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1))

    if draw_fitting_curve:
        evaluate.draw_fitting_curve(y_true, y_pred)

    del model
    gc.collect()
    K.clear_session()
    tf.reset_default_graph()
    return evaluate.all_metrics(y_true, y_pred)
Exemple #7
0
def main(conf, is_train=True):
    df_raw = data.get_raw_data(conf['data_conf']['data_path'], usecols=conf['data_conf']['usecols'])
    X_train, X_test, y_train, y_test, y_scaler = data.process_data_for_lstm(df_raw, conf['model_conf'])

    if is_train:
        if os.path.exists(model_path):
            json_string = open(model_path).read()
            model = model_from_json(json_string)
            # 有参数则加载
            if os.path.exists(weight_path):
                print('load weights ' + weight_path)
                model.load_weights(weight_path)
        else:
            model = Sequential()
            model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
            model.add(LSTM(32, return_sequences=False))
            model.add(Dense(units=64, activation='linear'))
            model.add(Dense(units=1))
            open(model_path, 'w').write(model.to_json())
        model.compile(loss='mse', optimizer='RMSprop')
        checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        callbacks_list = [checkpoint]
        history = model.fit(X_train, y_train, epochs=conf['model_conf']['epochs'],
                            batch_size=conf['model_conf']['batch_size'], validation_data=(X_test, y_test), verbose=1,
                            callbacks=callbacks_list, shuffle=False)

        evaluate.draw_loss_curve(figure_num=conf['model_conf']['target_col'], train_loss=history.history['loss'],
                                 val_loss=history.history['val_loss'])
    else:
        json_string = open(model_path).read()
        model = model_from_json(json_string)
        model.load_weights(weight_path)
        y_pred = model.predict(X_test)
        y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1))
        y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
        df_all_metrics = evaluate.all_metrics(y_true[: len(y_true) - 1], y_pred[1:])
        evaluate.draw_fitting_curve(y_true[: len(y_true) - 1], y_pred[1:])
def main(data_path, cols, time_steps, test_split, model_root_path, lstm_layers, lstm_layer_nodes, is_train=True):
    # 参数设置
    epoch = 1000
    # batch = 1024
    batch = 512
    test_split = 0.4
    # lstm_layers = [2, 3, 4, 5]
    # lstm_layers = [3]
    # lstm_layer_nodes = [300, 600, 900, 1200]
    lstm_activation = 'softsign'
    lstm_recurrent_activation = 'hard_sigmoid'
    lstm_input_shape = (1, 24 * 6)
    lstm_dropout = 0.3
    dense_layers = [1024, 1024]
    dense_activation = 'relu'
    date_features_shape = (1, 12 + 31 + 24)
    dense_dropout = 0.5

    if is_train:
        # 训练
        print('训练模型...')
        for l in lstm_layers:
            for n in lstm_layer_nodes:
                df_raw_data = pd.read_csv(data_path, usecols=cols, dtype=str)
                model_path = model_root_path + '/lstm_model_layers' + str(l) + '_nodes' + str(n) + '.json'
                weight_path = model_root_path + '/weights_layer' + str(l) + '_nodes' + str(n) + '.best.hdf5'
                lstm_conf = {
                    'max_features': max(time_steps.values()) * len(time_steps),
                    'input_shape': lstm_input_shape,
                    'layers': [n] * l,
                    'activation': lstm_activation,
                    'recurrent_activation': lstm_recurrent_activation,
                    'dropout': lstm_dropout,
                    'is_transfer': False,
                    'frozen_num': 0,
                    'add_layer_num': 0
                }
                dense_conf = {
                    # 时间特征:12个月+31天+24小时
                    'date_features_shape': date_features_shape,
                    'layers': dense_layers,
                    'activation': dense_activation,
                    'dropout': dense_dropout
                }
                y_true, y_pred = train(df_raw_data, model_path, weight_path, epochs=epoch, batch_size=batch,
                                       lstm_config=lstm_conf,
                                       dense_config=dense_conf, time_steps=time_steps, test_split=test_split)
                evaluate.all_metrics(y_true, y_pred)
    else:
        # 预测
        print('预测...')
        test_split = 1
        df_metrics = pd.DataFrame()
        for l in lstm_layers:
            for n in lstm_layer_nodes:
                df_raw_data = pd.read_csv(data_path, usecols=cols, dtype=str)
                model_path = model_root_path + '/lstm_model_layers' + str(l) + '_nodes' + str(n) + '.json'
                weight_path = model_root_path + '/weights_layer' + str(l) + '_nodes' + str(n) + '.best.hdf5'
                y_true, y_pred = predict(df_raw_data, time_steps, test_split, model_path, weight_path)
                df_temp = evaluate.all_metrics(y_true, y_pred)
                df_temp['layers'] = l
                df_temp['nodes'] = n
                df_metrics = df_metrics.append(df_temp)
                print('Write all metrics to file...')
                df_metrics.to_csv(model_root_path + '/all_metrics_values.csv', index=False)
def transfer_learning(use_model_path, use_weights_path, test_split, lstm_layers, lstm_layer_nodes, frozen_layer_names, add_layer_num,
                      mode=0):
    """
    :param use_model_path: 
    :param use_weights_path: 
    :param test_split: 
    :param lstm_layers: 
    :param lstm_layer_nodes: 
    :param model_root_path: 
    :param frozen_layer_names: an array
    :param add_layer_num: 
    :param mode: mode=0使用已有模型迁移学习,mode=1不使用现有模型直接训练
    :return: 
    """""
    cols = ['PM25', 'Press', 'RH', 'Temp', 'Wind Speed', 'Wind Direction', 'Month', 'Day', 'Hour']
    data_path = '../DataSet/Processed/Train/060376012_2016_2017_v1.csv'
    model_root_path = '../Models/LSTM1/TransferLearning'
    transfer_use_model_path = model_root_path + use_model_path
    transfer_use_weights_path = model_root_path + use_weights_path
    model_path = {
        'load_model_path': '',
        'save_model_path': '',

    }
    weight_path = {
        'load_weights_path': '',
        'save_weights_path': ''
    }
    time_steps = {
        'PM25': 24,
        'Press': 24,
        'RH': 8,
        'Temp': 7,
        'Wind Speed': 2,
        'Wind Direction': 4
    }
    if mode == 0:
        model_path['load_model_path'] = transfer_use_model_path
        weight_path['load_weights_path'] = transfer_use_weights_path
        model_path['save_model_path'] = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str(
            add_layer_num) + '.json'
        weight_path['save_weights_path'] = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.transfer_frozen' + str(len(frozen_layer_names)) + '_add' + str(
            add_layer_num) + '.best.hdf5'
    else:
        model_path['save_model_path'] = model_root_path + '/lstm_model_layers' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.direct.json'
        weight_path['save_weights_path'] = model_root_path + '/weights_layer' + str(lstm_layers) + '_nodes' + str(
            lstm_layer_nodes) + '.direct.best.hdf5'
    df_raw_data = pd.read_csv(data_path, usecols=cols, dtype=str)
    lstm_conf = {
        'max_features': max(time_steps.values()) * len(time_steps),
        'input_shape': (1, 24 * 6),
        'layers': [lstm_layer_nodes] * lstm_layers,
        'activation': 'softsign',
        'recurrent_activation': 'hard_sigmoid',
        'dropout': 0.3,
        'frozen_layer_names': frozen_layer_names,
        'add_layer_num': add_layer_num
    }
    dense_conf = {
        # 时间特征:12个月+31天+24小时
        'date_features_shape': (1, 12 + 31 + 24),
        'layers': [1024, 1024],
        'activation': 'relu',
        'dropout': 0.5
    }
    # 移学习
    y_true, y_pred = train(df_raw_data, model_path, weight_path, epochs=1000, batch_size=512, lstm_config=lstm_conf,
                           dense_config=dense_conf, time_steps=time_steps, test_split=test_split)
    evaluate.all_metrics(y_true, y_pred)
Exemple #10
0
def main(is_train=True):
    data_path = '../DataSet/Processed/Train/261630033_2016_2017_v1.csv'
    # model_path = '../Models/Test/model.best.json'
    # weight_path = '../Models/Test/weights.best.hdf5'
    model_path = '../Models/Test/model_epochs10_batch24.best.json'
    weight_path = '../Models/Test/weights_epochs10_batch24.best.hdf5'
    df_raw = data.get_raw_data(data_path, ['PM25'], dtype=float)
    seq_data = np.array(df_raw).reshape(1, -1)[0]
    test_split = 0.4
    time_steps = 4
    new_data = []
    for i in range(len(df_raw) - time_steps):
        new_data.append(list(seq_data[i:i + time_steps + 1]))
    new_data = np.array(new_data)
    train_num = int(len(new_data) * (1 - test_split))

    y_scaled, y_scaler = data.min_max_scale(new_data[:, -1].reshape(-1, 1))
    X_scaled, X_scaler = data.min_max_scale(new_data[:, 0:time_steps])

    y_train = y_scaled[:train_num, :].reshape(1, -1)[0]
    y_test = y_scaled[train_num:, :].reshape(1, -1)[0]
    X_train = X_scaled[:train_num, :]
    X_test = X_scaled[train_num:, :]
    X_train = X_train.reshape(X_train.shape[0], time_steps, 1)
    X_test = X_test.reshape(X_test.shape[0], time_steps, 1)

    if is_train:
        if os.path.exists(model_path):
            json_string = open(model_path).read()
            model = model_from_json(json_string)
            # 有参数则加载
            if os.path.exists(weight_path):
                print('load weights ' + weight_path)
                model.load_weights(weight_path)
        else:
            model = Sequential()
            model.add(
                LSTM(32,
                     input_shape=(X_train.shape[1], X_train.shape[2]),
                     return_sequences=True))
            model.add(LSTM(32, return_sequences=False))
            model.add(Dense(units=64, activation='linear'))
            model.add(Dense(units=1))
            open(model_path, 'w').write(model.to_json())
        model.compile(loss='mse', optimizer='RMSprop')
        checkpoint = ModelCheckpoint(weight_path,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')
        callbacks_list = [checkpoint]
        history = model.fit(X_train,
                            y_train,
                            epochs=20,
                            batch_size=24,
                            validation_data=(X_test, y_test),
                            verbose=1,
                            callbacks=callbacks_list,
                            shuffle=False)

        evaluate.draw_loss_curve(figure_num='PM2.5',
                                 train_loss=history.history['loss'],
                                 val_loss=history.history['val_loss'])
    else:
        json_string = open(model_path).read()
        model = model_from_json(json_string)
        model.load_weights(weight_path)
        y_pred = model.predict(X_test)
        y_true = y_scaler.inverse_transform(y_test.reshape(-1, 1))
        y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
        df_all_metrics = evaluate.all_metrics(y_true[:len(y_true) - 1],
                                              y_pred[1:])
        evaluate.draw_fitting_curve(y_true[:len(y_true) - 1], y_pred[1:])