c = get_label_data("t10k-labels-idx1-ubyte") y = get_feature_data("t10k-images-idx3-ubyte") # cTrain = get_label_data("train-labels-idx1-ubyte")[:,:10000] # yTrain = get_feature_data("train-images-idx3-ubyte")[:10000, :] cTrain = get_label_data("train-labels-idx1-ubyte") yTrain = get_feature_data("train-images-idx3-ubyte") y = np.matrix(normalize(y, axis=1)) yTrain = np.matrix(normalize(yTrain, axis=1)) # print(yTrain.shape) # print(cTrain.shape) w_init = get_weight_matrix(yTrain, cTrain) score1 = calculate_accuracy(y, w_init, c) print("initial accuracy", score1) # Testing CGLS # hessian_handle = lambda x: hessian_sub(yTrain.T, w_init.T, x) # D = cgls_wiki( # hessian_handle, # -1 * softmax_gradient(cTrain, yTrain.T, w_init.T), # k=200, # x_0=w_init.T # ) # diff_matrix = -1 * softmax_gradient(cTrain, yTrain.T, w_init.T) - hessian_sub(yTrain.T,w_init.T, D) # print("norm of diff_matrix", np.linalg.norm(diff_matrix))
return 1 / (1 + np.exp(-values)) if __name__ == '__main__': data_formatted, data_without_class, data_class_only, _ = func.read_csv_and_prep( ) # Initialise coefficients (weights) to 0 coefficients = np.zeros(data_formatted.shape[1]) predictions = None for count in range(EPOCH_COUNT): if count > 0 and count % 10000 == 0: print("Count: ", count) predictions = logistic_function(data_formatted, coefficients) # Calculating deviation between real and predicted error = data_class_only.T - predictions # The gradient function is to be reduced, as we approach 0 we approach the least error (as the gradient tends # towards 0, our function shows that our results are becoming more accurate) gradient = np.dot(data_formatted.T, error) # Adjusting coefficients for more accurate result coefficients += gradient * LEARNING_RATE print("Coefficient values {}".format(coefficients)) print("Accuracy: {}".format( func.calculate_accuracy(predictions, data_class_only)))
for item in N_trees_list: start_train_time = time.time() forest = random_forest_algorithm(train_df, n_trees=item, n_bootstrap=100, n_features=100, dt_max_depth=10) train_time = time.time() - start_train_time start_test_time = time.time() predictions = random_forest_predictions(test_df, forest) test_time = time.time() - start_test_time accuracy = calculate_accuracy(predictions, test_df.Label) accuracy_list.append(accuracy) train_time_list.append(train_time) test_time_list.append(test_time) title_name = str( 'accuracy_k120_axis-aligned_varytrees2_bootstrap100_features100_depth10.png' ) fig = plt.figure() ax = fig.add_subplot(111) lns1 = ax.plot(N_trees_list, train_time_list, '-r', label='train_time') lns2 = ax.plot(N_trees_list, test_time_list, '-g', label='test_time') ax2 = ax.twinx()