Ejemplo n.º 1
0
def run_naive_rerf(dataset_name,
                   data,
                   choosen_classes,
                   sub_train_indices,
                   rf_type="shared"):
    (train_images, train_labels), (test_images, test_labels) = get_subset_data(
        dataset_name, data, choosen_classes, sub_train_indices)
    time_taken = dict()

    # Train
    train_start = time.time()
    forest = fastRerF(X=train_images.reshape(len(train_images), -1),
                      Y=train_labels,
                      forestType=TREE_TYPE,
                      trees=100,
                      numCores=cpu_count() - 1)
    train_end = time.time()
    time_taken["train"] = train_end - train_start
    # forest.printParameters()

    # Test
    test_start = time.time()
    test_preds = fastPredict(test_images.reshape(len(test_images), -1), forest)
    test_end = time.time()
    time_taken["test"] = test_end - test_start

    return accuracy_score(test_labels, test_preds), time_taken
Ejemplo n.º 2
0
def run_one_layer_deep_conv_rf(dataset_name,
                               data,
                               choosen_classes,
                               sub_train_indices,
                               type="shared"):
    (train_images, train_labels), (test_images, test_labels) = get_subset_data(
        dataset_name, data, choosen_classes, sub_train_indices)

    time_taken = dict()

    # ConvRF (layer 1)
    if type == "rerf_shared":
        conv1 = DeepConvRF(type="rerf_shared",
                           kernel_size=10,
                           stride=2,
                           rerf_params={
                               "num_trees": RERF_NUM_TREES,
                               "tree_type": RERF_TREE_TYPE
                           })
    else:
        conv1 = DeepConvRF(type=type, kernel_size=10, stride=2)

    conv1_map = conv1.convolve_fit(train_images, train_labels)
    conv1_map_test = conv1.convolve_predict(test_images)
    time_taken = copy.deepcopy(conv1.time_taken)

    # Full RF
    train_start = time.time()
    if type == "rerf_shared":
        conv1_full_RF = fastRerF(X=conv1_map.reshape(len(train_images), -1),
                                 Y=train_labels,
                                 forestType=RERF_TREE_TYPE,
                                 trees=100,
                                 numCores=cpu_count() - 1)
    else:
        conv1_full_RF = RandomForestClassifier(n_estimators=100, n_jobs=-1)
        conv1_full_RF.fit(conv1_map.reshape(len(train_images), -1),
                          train_labels)
    train_end = time.time()
    time_taken["train"] += (train_end - train_start)
    time_taken["final_fit"] = (train_end - train_start)

    test_start = time.time()
    if type == "rerf_shared":
        test_preds = fastPredict(conv1_map_test.reshape(len(test_images), -1),
                                 conv1_full_RF)
    else:
        test_preds = conv1_full_RF.predict(
            conv1_map_test.reshape(len(test_images), -1))
    test_end = time.time()
    time_taken["test"] += (test_end - test_start)
    time_taken["final_predict"] = (test_end - test_start)

    return accuracy_score(test_labels, test_preds), time_taken
Ejemplo n.º 3
0
#     CSVFile=datafile,
#     Ycolumn=label_col,
#     forestType="binnedBaseRerF",
#     trees=500,
#     numCores=cpu_count(),
# )
forest = fastRerF(X=feat_data,
                  Y=labels,
                  forestType="binnedBaseRerF",
                  trees=500,
                  numCores=cpu_count())

forest.printParameters()

# training predictions
predictions = fastPredict(feat_data, forest)
# print(predictions)

# training posterior predictions probabilities
post_pred = fastPredictPost(feat_data, forest)
# print(post_pred)

print("Error rate", np.mean(predictions != labels))

print("loading test data...")

if datatype == "iris":
    data_fname = "./iris.csv"  # iris
elif datatype == "mnist":
    data_fname = "../packedForest/res/mnist_test.csv"  # mnist
test_X = np.genfromtxt(data_fname, delimiter=",")