Esempio n. 1
0
def cross_validataion_cnn(t_span,
                          height_span,
                          image_size,
                          downsample_size,
                          cnn_model,
                          initial_weights,
                          augment,
                          test_ratio=0.2,
                          limit=10000):
    """
    cross validation for cnn-like models
    """
    K = int(1 / test_ratio)
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=limit)
        X = X / NORM_X
        Y = Y / NORM_Y
        # X, Y = preprocessing_data(X, Y)

        k_fold = KFold(K)
        Y_pred = np.zeros((len(Y), 1))
        for k, (train, test) in enumerate(k_fold.split(X, Y)):
            reset_weights(cnn_model, initial_weights)
            train_X, train_Y = preprocessing_data(X[train], Y[train])
            test_X, test_Y = X[test], Y[test]
            if augment:
                train_X, train_Y = augment_training_data(train_X,
                                                         train_Y,
                                                         image_size,
                                                         mode='image')
            early_stop = EarlyStopping(monitor='loss', patience=0)
            cnn_model.fit(train_X,
                          train_Y,
                          batch_size=32,
                          epochs=200,
                          verbose=1,
                          validation_data=(test_X, test_Y),
                          callbacks=[
                              early_stop,
                          ])
            Y_pred[test] = cnn_model.predict(test_X).reshape(-1, 1)
            print("cv {} rmse: {}".format(
                k, rmse(Y_pred[test] * NORM_Y, Y[test] * NORM_Y)))

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred)))
        print("avg rmse:{}".format(rmse(Y, avg_Y_pred)))

    return avg_Y_pred, Y
Esempio n. 2
0
def train_full_cnn_model(t_span, height_span, image_size, downsample_size,
                         cnn_model, initial_weights, learner_storage_path):
    for t in t_span:
        print("training t{} h{}...".format(t, height_span))
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=10000)
        reset_weights(cnn_model, initial_weights)
        cnn_model.fit(X, Y, batch_size=256, epochs=10, verbose=1)
        cnn_model.save("{}/t{}h{}size{}.krs".format(learner_storage_path, t,
                                                    height_span, image_size))
Esempio n. 3
0
def cross_validataion_rnn(t_span,
                          height_span,
                          image_size,
                          downsample_size,
                          rnn_model,
                          initial_weights,
                          test_ratio=0.2,
                          limit=10000):
    """
    cross_validation on rnn models
    """
    K = int(1 / test_ratio)
    time_length = len(t_span)
    channels = len(height_span)
    Xs = np.zeros((limit, time_length, channels * image_size * image_size))
    t_span.sort()
    for idx, t in enumerate(t_span):
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=limit)
        X = X.reshape((limit, 1, -1))
        for i in range(limit):
            Xs[i, idx] = X[i]

    Xs, Y = preprocessing_data(X, Y)

    k_fold = KFold(K)
    Y_pred = np.zeros((limit, 1))
    for k, (train, test) in enumerate(k_fold.split(Xs, Y)):
        reset_weights(rnn_model, initial_weights)
        rnn_model.fit(Xs[train],
                      Y[train],
                      batch_size=32,
                      epochs=50,
                      verbose=1,
                      validation_data=(Xs[test], Y[test]))
        Y_pred[test] = rnn_model.predict(Xs[test]).reshape(-1, 1)
        print("cv {} rmse: {}".format(k, rmse(Y_pred[test], Y[test])))

    print("overall rmse: {}".format(rmse(Y, Y_pred)))

    return Y_pred, Y
Esempio n. 4
0
def cross_validataion_convlstm(t_span,
                               height_span,
                               image_size,
                               downsample_size,
                               convlstm_model,
                               test_ratio=0.2,
                               limit=10000):
    """
    cross validation on rnn+cnn models
    """
    K = int(1 / test_ratio)
    k_fold = KFold(K)
    time_length = len(t_span)
    channels = len(height_span)
    Xs = np.zeros((limit, time_length, image_size, image_size, channels))
    t_span.sort()
    for idx, t in enumerate(t_span):
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=limit)
        X = X.reshape((limit, 1, image_size, image_size, channels))
        for i in range(limit):
            Xs[i, idx] = X[i]

    Xs = Xs / NORM_X
    Y = Y / NORM_Y

    Y_pred = np.zeros((limit, 1))
    for k, (train, test) in enumerate(k_fold.split(Xs, Y)):
        # reset_weights(convlstm_model, initial_weights)
        convlstm_model.fit(Xs[train],
                           Y[train],
                           batch_size=64,
                           epochs=200,
                           verbose=1,
                           validation_data=(Xs[test], Y[test]))
        Y_pred[test] = convlstm_model.predict(Xs[test]).reshape(-1, 1)
        print("cv {} rmse: {}".format(k, NORM_Y * rmse(Y_pred[test], Y[test])))

    print("overall rmse: {}".format(NORM_Y * rmse(Y, Y_pred)))

    return Y_pred, Y
Esempio n. 5
0
def time_sensitive_validataion_cnn(t_span,
                                   height_span,
                                   image_size,
                                   downsample_size,
                                   cnn_models,
                                   initial_weights,
                                   augment,
                                   holdout=0.1):
    """
    use the first part of training data for training and the last part of the data for validation
    holdout is the percentage of the validation part
    """
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=10000)
        num_of_recs = len(Y)
        train = range(int(num_of_recs * (1 - holdout)))
        test = range(int(num_of_recs * (1 - holdout)), num_of_recs)

        if augment is False:
            train_X, train_Y = X[train], Y[train]
        else:
            train_X, train_Y = augment_training_data(X[train],
                                                     Y[train],
                                                     image_size,
                                                     mode='image')

        Y_pred = None
        for idx, cnn_model in enumerate(cnn_models):
            reset_weights(cnn_model, initial_weights)
            cnn_model.fit(train_X,
                          train_Y,
                          batch_size=256,
                          epochs=5,
                          verbose=1,
                          validation_data=(X[test], Y[test]))
            Y_pred_each = cnn_model.predict(X[test]).reshape(-1, 1)
            print("model {} rmse: {}".format(idx, rmse(Y[test], Y_pred_each)))
            if Y_pred is None:
                Y_pred = Y_pred_each
            else:
                Y_pred = np.concatenate((Y_pred, Y_pred_each), axis=1)
        Y_pred = np.mean(Y_pred, axis=1).reshape(-1, 1)

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        print(Y_pred_collection.shape)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test],
                                                              Y_pred)))
        print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred)))

    return avg_Y_pred, Y[test]