예제 #1
0
def cross_validataion_cnn(t_span,
                          height_span,
                          image_size,
                          downsample_size,
                          cnn_model,
                          initial_weights,
                          augment,
                          test_ratio=0.2,
                          limit=10000):
    """
    cross validation for cnn-like models
    """
    K = int(1 / test_ratio)
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=limit)
        X = X / NORM_X
        Y = Y / NORM_Y
        # X, Y = preprocessing_data(X, Y)

        k_fold = KFold(K)
        Y_pred = np.zeros((len(Y), 1))
        for k, (train, test) in enumerate(k_fold.split(X, Y)):
            reset_weights(cnn_model, initial_weights)
            train_X, train_Y = preprocessing_data(X[train], Y[train])
            test_X, test_Y = X[test], Y[test]
            if augment:
                train_X, train_Y = augment_training_data(train_X,
                                                         train_Y,
                                                         image_size,
                                                         mode='image')
            early_stop = EarlyStopping(monitor='loss', patience=0)
            cnn_model.fit(train_X,
                          train_Y,
                          batch_size=32,
                          epochs=200,
                          verbose=1,
                          validation_data=(test_X, test_Y),
                          callbacks=[
                              early_stop,
                          ])
            Y_pred[test] = cnn_model.predict(test_X).reshape(-1, 1)
            print("cv {} rmse: {}".format(
                k, rmse(Y_pred[test] * NORM_Y, Y[test] * NORM_Y)))

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred)))
        print("avg rmse:{}".format(rmse(Y, avg_Y_pred)))

    return avg_Y_pred, Y
예제 #2
0
def time_sensitive_validataion_avg_aggregate_4_viewpoints(
        t_span,
        height_span,
        image_size,
        learner,
        augment,
        downsample_size,
        test_ratio=0.2,
        data_preprocess=None,
        limit=10000):
    """
    use the first part of training data for training and the last part of the data for validation
    holdout is the percentage of the validation part
    """
    Y_pred_collection = None
    for t in t_span:
        Xs, Y = load_training_data_sklearn_4_viewpoints(
            t=t,
            height_span=height_span,
            image_size=image_size,
            downsample_size=downsample_size,
            limit=limit)
        Y_1D = Y.reshape(-1)
        num_of_recs = len(Y_1D)
        train = range(int(num_of_recs * (1 - test_ratio)))
        test = range(int(num_of_recs * (1 - test_ratio)), num_of_recs)
        Y_pred_4_viewpoints = np.zeros((len(test), 4))
        for idx, X in enumerate(Xs):
            X, _ = preprocessing_data(X, Y, methods=data_preprocess)
            if not augment:
                learner.fit(X[train], Y_1D[train])
            else:
                augment_X, augment_Y = augment_training_data(X[train],
                                                             Y_1D[train],
                                                             image_size,
                                                             mode='flatten')
                learner.fit(augment_X, augment_Y)
            Y_pred_4_viewpoints[:, idx] = learner.predict(X[test]).reshape(-1)
            print("view point {} rmse: {}".format(
                idx + 1, rmse(Y_pred_4_viewpoints[:, idx], Y[test])))

        Y_pred = np.mean(Y_pred_4_viewpoints, axis=1).reshape(-1, 1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test],
                                                              Y_pred)))
        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)
        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred)))

    return avg_Y_pred, Y[test]
예제 #3
0
def time_sensitive_validataion_avg_aggregate(t_span,
                                             height_span,
                                             image_size,
                                             learner,
                                             augment,
                                             downsample_size,
                                             test_ratio=0.2,
                                             data_preprocess=None,
                                             limit=10000):
    """
    use the first part of training data for training and the last part of the data for validation
    holdout is the percentage of the validation part
    """
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data_sklearn(t=t,
                                          height_span=height_span,
                                          image_size=image_size,
                                          downsample_size=downsample_size,
                                          limit=limit)
        X, Y = preprocessing_data(X, Y, methods=data_preprocess)
        Y_1D = Y.reshape(-1)
        num_of_recs = len(Y_1D)
        train = range(int(num_of_recs * (1 - test_ratio)))
        test = range(int(num_of_recs * (1 - test_ratio)), num_of_recs)
        if not augment:
            X_train, Y_train = X[train], Y_1D[train]
        else:
            X_train, Y_train = augment_training_data(X[train],
                                                     Y_1D[train],
                                                     image_size,
                                                     mode='flatten')
        learner.fit(X_train, Y_train)
        Y_pred = learner.predict(X[test]).reshape(-1, 1)

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1).reshape(-1, 1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test],
                                                              Y_pred)))
        print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred)))

    np.savetxt("result_cache.txt",
               np.concatenate((Y[test], avg_Y_pred, Y_pred_collection),
                              axis=1),
               fmt="%.5f")

    return avg_Y_pred, Y[test]
예제 #4
0
def cross_validataion_avg_aggregate(t_span,
                                    height_span,
                                    image_size,
                                    learner,
                                    augment,
                                    downsample_size,
                                    test_ratio=0.2,
                                    data_preprocess=None,
                                    limit=10000):
    K = int(1 / test_ratio)
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data_sklearn(t=t,
                                          height_span=height_span,
                                          image_size=image_size,
                                          downsample_size=downsample_size,
                                          limit=limit)
        X, Y = preprocessing_data(X, Y, methods=data_preprocess)
        Y_1D = Y.reshape(-1)

        k_fold = KFold(K)
        Y_pred = np.zeros((len(Y), 1))
        for k, (train, test) in enumerate(k_fold.split(X, Y_1D)):
            if not augment:
                learner.fit(X[train], Y_1D[train])
            else:
                augment_X, augment_Y = augment_training_data(X[train],
                                                             Y_1D[train],
                                                             image_size,
                                                             mode='flatten')
                learner.fit(augment_X, augment_Y)
            Y_pred[test] = learner.predict(X[test]).reshape(-1, 1)
            print("cv {} rmse: {}".format(k, rmse(Y_pred[test], Y[test])))

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred)))
        print("avg rmse:{}".format(rmse(Y, avg_Y_pred)))

    return Y_pred_collection, Y
예제 #5
0
def time_sensitive_validataion_cnn(t_span,
                                   height_span,
                                   image_size,
                                   downsample_size,
                                   cnn_models,
                                   initial_weights,
                                   augment,
                                   holdout=0.1):
    """
    use the first part of training data for training and the last part of the data for validation
    holdout is the percentage of the validation part
    """
    Y_pred_collection = None
    for t in t_span:
        X, Y = load_training_data(t=t,
                                  height_span=height_span,
                                  image_size=image_size,
                                  downsample_size=downsample_size,
                                  limit=10000)
        num_of_recs = len(Y)
        train = range(int(num_of_recs * (1 - holdout)))
        test = range(int(num_of_recs * (1 - holdout)), num_of_recs)

        if augment is False:
            train_X, train_Y = X[train], Y[train]
        else:
            train_X, train_Y = augment_training_data(X[train],
                                                     Y[train],
                                                     image_size,
                                                     mode='image')

        Y_pred = None
        for idx, cnn_model in enumerate(cnn_models):
            reset_weights(cnn_model, initial_weights)
            cnn_model.fit(train_X,
                          train_Y,
                          batch_size=256,
                          epochs=5,
                          verbose=1,
                          validation_data=(X[test], Y[test]))
            Y_pred_each = cnn_model.predict(X[test]).reshape(-1, 1)
            print("model {} rmse: {}".format(idx, rmse(Y[test], Y_pred_each)))
            if Y_pred is None:
                Y_pred = Y_pred_each
            else:
                Y_pred = np.concatenate((Y_pred, Y_pred_each), axis=1)
        Y_pred = np.mean(Y_pred, axis=1).reshape(-1, 1)

        if Y_pred_collection is None:
            Y_pred_collection = Y_pred
        else:
            Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred),
                                               axis=1)

        print(Y_pred_collection.shape)

        avg_Y_pred = np.mean(Y_pred_collection, axis=1)
        print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test],
                                                              Y_pred)))
        print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred)))

    return avg_Y_pred, Y[test]