c = get_label_data("t10k-labels-idx1-ubyte")
    y = get_feature_data("t10k-images-idx3-ubyte")
    # cTrain = get_label_data("train-labels-idx1-ubyte")[:,:10000]
    # yTrain = get_feature_data("train-images-idx3-ubyte")[:10000, :]
    cTrain = get_label_data("train-labels-idx1-ubyte")
    yTrain = get_feature_data("train-images-idx3-ubyte")
    y = np.matrix(normalize(y, axis=1))
    yTrain = np.matrix(normalize(yTrain, axis=1))


    # print(yTrain.shape)
    # print(cTrain.shape)
    w_init = get_weight_matrix(yTrain, cTrain)

    score1 = calculate_accuracy(y, w_init, c)
    print("initial accuracy", score1)


    # Testing CGLS
    # hessian_handle = lambda x: hessian_sub(yTrain.T, w_init.T, x)
    # D = cgls_wiki(
    #     hessian_handle,
    #     -1 * softmax_gradient(cTrain, yTrain.T, w_init.T),
    #     k=200,
    #     x_0=w_init.T
    # )

    # diff_matrix = -1 * softmax_gradient(cTrain, yTrain.T, w_init.T) - hessian_sub(yTrain.T,w_init.T, D)
    # print("norm of diff_matrix", np.linalg.norm(diff_matrix))
Beispiel #2
0
    return 1 / (1 + np.exp(-values))


if __name__ == '__main__':
    data_formatted, data_without_class, data_class_only, _ = func.read_csv_and_prep(
    )

    # Initialise coefficients (weights) to 0
    coefficients = np.zeros(data_formatted.shape[1])

    predictions = None

    for count in range(EPOCH_COUNT):
        if count > 0 and count % 10000 == 0:
            print("Count: ", count)

        predictions = logistic_function(data_formatted, coefficients)

        # Calculating deviation between real and predicted
        error = data_class_only.T - predictions
        # The gradient function is to be reduced, as we approach 0 we approach the least error (as the gradient tends
        # towards 0, our function shows that our results are becoming more accurate)
        gradient = np.dot(data_formatted.T, error)
        # Adjusting coefficients for more accurate result
        coefficients += gradient * LEARNING_RATE

    print("Coefficient values {}".format(coefficients))
    print("Accuracy: {}".format(
        func.calculate_accuracy(predictions, data_class_only)))
Beispiel #3
0
for item in N_trees_list:

    start_train_time = time.time()
    forest = random_forest_algorithm(train_df,
                                     n_trees=item,
                                     n_bootstrap=100,
                                     n_features=100,
                                     dt_max_depth=10)
    train_time = time.time() - start_train_time

    start_test_time = time.time()
    predictions = random_forest_predictions(test_df, forest)
    test_time = time.time() - start_test_time

    accuracy = calculate_accuracy(predictions, test_df.Label)

    accuracy_list.append(accuracy)
    train_time_list.append(train_time)
    test_time_list.append(test_time)

title_name = str(
    'accuracy_k120_axis-aligned_varytrees2_bootstrap100_features100_depth10.png'
)

fig = plt.figure()
ax = fig.add_subplot(111)

lns1 = ax.plot(N_trees_list, train_time_list, '-r', label='train_time')
lns2 = ax.plot(N_trees_list, test_time_list, '-g', label='test_time')
ax2 = ax.twinx()