Esempio n. 1
0
if __name__ == "__main__":

    # config
    args = parse_args()
    if args.model == 'ca':
        config = load_json('./mnist-ca.json')
    elif args.model == 'gc':
        config = load_json('./mnist-gc.json')
    else:
        config = load_json('./mnist-gc.json')

    gc = GCForest(config)

    # gc.set_keep_model_in_mem(False)
    gc.set_keep_model_in_mem(True)

    # data
    data_num_train = 60000  # The number of figures
    data_num_test = 10000  # test num
    fig_w = 45  # width of each figure

    X_train = np.fromfile("./data/mnist_train/mnist_train_data",
                          dtype=np.uint8)
    y_train = np.fromfile("./data/mnist_train/mnist_train_label",
                          dtype=np.uint8)
    X_test = np.fromfile("./data/mnist_test/mnist_test_data", dtype=np.uint8)
    y_test = np.fromfile("./data/mnist_test/mnist_test_label", dtype=np.uint8)

    # reshape the matrix
    X_train = X_train.reshape(data_num_train, 1, fig_w, fig_w)
    })
    config["cascade"] = ca_config
    return config


if __name__ == "__main__":
    args = parse_args()
    if args.model is None:
        config = get_toy_config()
    else:
        config = load_json(args.model)

    gc = GCForest(config)
    # If the model you use cost too much memory for you.
    # You can use these methods to force gcforest not keeping model in memory
    gc.set_keep_model_in_mem(False)  # default is TRUE.

    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # X_train, y_train = X_train[:2000], y_train[:2000]
    X_train = X_train[:, np.newaxis, :, :]
    X_test = X_test[:, np.newaxis, :, :]

    X_train_enc = gc.fit_transform(X_train, y_train)
    # X_enc is the concatenated predict_proba result of each estimators of the last layer of the GCForest model
    # X_enc.shape =
    #   (n_datas, n_estimators * n_classes): If cascade is provided
    #   (n_datas, n_estimators * n_classes, dimX, dimY): If only finegrained part is provided
    # You can also pass X_test, y_test to fit_transform method, then the accracy on test data will be logged when training.
    # X_train_enc, X_test_enc = gc.fit_transform(X_train, y_train, X_test=X_test, y_test=y_test)
    # WARNING: if you set gc.set_keep_model_in_mem(True), you would have to use
    # gc.fit_transform(X_train, y_train, X_test=X_test, y_test=y_test) to evaluate your model.
    config["cascade"] = ca_config
    return config


if __name__ == "__main__":
    args = parse_args()
    if args.model is None:
        config = get_toy_config()
    else:
        config = load_json(args.model)

    gc = GCForest(config)
    # If the model you use cost too much memory for you.
    # You can use these methods to force gcforest not keeping model in memory
    # gc.set_keep_model_in_mem(False), default is TRUE.
    gc.set_keep_model_in_mem(False)

    (X_train, y_train), (X_test, y_test) = load_defect_data()
    print('X_train.shape', X_train.shape, 'X_test', X_test.shape, 'y_train', y_train.shape, 'y_test', y_test.shape)
    # X_train, y_train = X_train[:2000], y_train[:2000]
    X_train = X_train[:, np.newaxis, :, :]
    X_test = X_test[:, np.newaxis, :, :]


    X_train_enc = gc.fit_transform(X_train,y_train, X_test=X_test, y_test=y_test)
    # X_enc is the concatenated predict_proba result of each estimators of the last layer of the GCForest model
    # X_enc.shape =
    #   (n_datas, n_estimators * n_classes): If cascade is provided
    #   (n_datas, n_estimators * n_classes, dimX, dimY): If only finegrained part is provided
    # You can also pass X_test, y_test to fit_transform method, then the accracy on test data will be logged when training.
    # X_train_enc, X_test_enc = gc.fit_transform(X_train, y_train, X_test=X_test, y_test=y_test)