def get_accuracy(data_file, data_format, knn_output, alg, k, d, N): # Get ground truth print "getting ground truth" ground_truth = run_sklearn.get_knn_graph(data_file, data_format, k, d, N, 'brute') print "finished getting ground truth" if alg == "chen": # Get knn graph file in numpy array print "creating numpy knn graph from chen's output" knn_graph = np.zeros((N,N), dtype=np.int) print "created graph; opening file to fill graph" with open(knn_output, 'r') as f: for line in f: line.strip() line = line.split() j = int(line[0]) - 1 i = int(line[1]) - 1 knn_graph[i][j] = 1 print "finished reading file, converting to sparse matrix" knn_sparse = sparse.csr_matrix(knn_graph) print "finished converting to sparse matrix" elif alg == "sklearn": knn_sparse = knn_output print "getting n_correct" n_incorrect = (ground_truth - knn_sparse).nnz accuracy = 1 - n_incorrect/(N*N+0.0) return accuracy
def get_accuracy(data_file, data_format, knn_output, alg, k, d, N): # Get ground truth print "getting ground truth" ground_truth = run_sklearn.get_knn_graph(data_file, data_format, k, d, N, "brute") print "finished getting ground truth" if alg == "chen": # Get knn graph file in numpy array print "creating numpy knn graph from chen's output" knn_graph = np.zeros((N, N), dtype=np.int) print "created graph; opening file to fill graph" with open(knn_output, "r") as f: for line in f: line.strip() line = line.split() j = int(line[0]) - 1 i = int(line[1]) - 1 knn_graph[i][j] = 1 print "finished reading file, converting to sparse matrix" knn_sparse = sparse.csr_matrix(knn_graph) print "finished converting to sparse matrix" elif alg == "sklearn": knn_sparse = knn_output print "getting n_correct" n_incorrect = (ground_truth - knn_sparse).nnz accuracy = 1 - n_incorrect / (N * N + 0.0) return accuracy
import sys import run_sklearn poker = ["/scratch/02234/kmcardle/data/poker.t", 'libsvm', 10, 1000000, 8] rna = ["/scratch/02234/kmcardle/data/cod-rna.t", 'libsvm', 8, 271617, 8] cadata = ["/scratch/02234/kmcardle/data/cadata", 'libsvm', 8, 20640, 8] covtype = [ "/scratch/02234/kmcardle/data/covtype.libsvm.binary", 'libsvm', 54, 581012, 8 ] year = [ "/scratch/02234/kmcardle/data/YearPredictionMSD", 'libsvm', 90, 463715, 8 ] aloi = ["/scratch/02234/kmcardle/data/aloi", 'libsvm', 128, 108000, 8] datasets = [poker, rna, cadata, covtype, year, aloi] for dataset in datasets: print dataset[0], ' -----------------------' algs = ['kd_tree', 'ball_tree', 'brute'] for alg in algs: print alg, ": +++++++++++++" # get_knn_graph(data_file, data_format, k, d, N, alg): run_sklearn.get_knn_graph(dataset[0], dataset[1], dataset[4], dataset[2], dataset[3], alg)
import sys import run_sklearn poker = ["/scratch/02234/kmcardle/data/poker.t", "libsvm", 10, 1000000, 8] rna = ["/scratch/02234/kmcardle/data/cod-rna.t", "libsvm", 8, 271617, 8] cadata = ["/scratch/02234/kmcardle/data/cadata", "libsvm", 8, 20640, 8] covtype = ["/scratch/02234/kmcardle/data/covtype.libsvm.binary", "libsvm", 54, 581012, 8] year = ["/scratch/02234/kmcardle/data/YearPredictionMSD", "libsvm", 90, 463715, 8] aloi = ["/scratch/02234/kmcardle/data/aloi", "libsvm", 128, 108000, 8] datasets = [poker, rna, cadata, covtype, year, aloi] for dataset in datasets: print dataset[0], " -----------------------" algs = ["kd_tree", "ball_tree", "brute"] for alg in algs: print alg, ": +++++++++++++" # get_knn_graph(data_file, data_format, k, d, N, alg): run_sklearn.get_knn_graph(dataset[0], dataset[1], dataset[4], dataset[2], dataset[3], alg)