def main(): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ # Load dataset x_24, y_24 = util.load_csv('../Data/ds1_x2_x4.csv', add_intercept=False) Gauss_analysis = GDA() theta = Gauss_analysis.fit(x_24, y_24) print(theta) np.savetxt('../Output/GDA_1.txt', theta) util.plot(x_24, y_24, theta, 'Bridge Age', 'Earthquake Magnitude', '../Output/GDA_visual_1.png') x_25, y_25 = util.load_csv('../Data/ds1_x2_x5.csv', add_intercept=False) Gauss_analysis = GDA() theta = Gauss_analysis.fit(x_25, y_25) print(theta) np.savetxt('../Output/GDA_2.txt', theta) util.plot(x_25, y_25, theta, 'Bridge Age', 'Distance to Epicenter', '../Output/GDA_visual_2.png') x_45, y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=False) Gauss_analysis = GDA() theta = Gauss_analysis.fit(x_45, y_45) print(theta) np.savetxt('../Output/GDA_3.txt', theta) util.plot(x_45, y_45, theta, 'Earthquake Magnitude', 'Distance to Epicenter', '../Output/GDA_visual_3.png')
def train_imdb(): train_texts, train_labels = util.load_csv('data/imdb_train.csv') val_texts, val_labels = util.load_csv('data/imdb_valid.csv') test_texts, test_labels = util.load_csv('data/imdb_test.csv') train_m_path = "saved/imdb_train_matrix.gz" val_m_path = "saved/imdb_val_matrix.gz" test_m_path = "saved/imdb_test_matrix.gz" if path.exists(train_m_path): train_matrix = np.loadtxt(train_m_path) else: train_matrix = util.transform_text_to_phon_cnts(train_texts) np.savetxt(train_m_path, train_matrix) if path.exists(val_m_path): val_matrix = np.loadtxt(val_m_path) else: val_matrix = util.transform_text_to_phon_cnts(val_texts) np.savetxt(val_m_path, val_matrix) if path.exists(test_m_path): test_matrix = np.loadtxt(test_m_path) else: test_matrix = util.transform_text_to_phon_cnts(test_texts) np.savetxt(test_m_path, test_matrix) model = {} model["train_matrix"] = train_matrix model["val_matrix"] = val_matrix model["test_matrix"] = test_matrix model["train_labels"] = train_labels model["val_labels"] = val_labels model["test_labels"] = test_labels return model
def train_perceptron(kernel_name, kernel, learning_rate): """Train a perceptron with the given kernel. This function trains a perceptron with a given kernel and then uses that perceptron to make predictions. The output predictions are saved to src/output/p05_{kernel_name}_predictions.txt. The output plots are saved to src/output_{kernel_name}_output.pdf. Args: kernel_name: The name of the kernel. kernel: The kernel function. learning_rate: The learning rate for training. """ train_x, train_y = util.load_csv('../data/ds5_train.csv') state = initial_state() for x_i, y_i in zip(train_x, train_y): update_state(state, kernel, learning_rate, x_i, y_i) test_x, test_y = util.load_csv('../data/ds5_train.csv') plt.figure(figsize=(12, 8)) util.plot_contour(lambda a: predict(state, kernel, a)) util.plot_points(test_x, test_y) plt.savefig('./output/p05_{}_output.pdf'.format(kernel_name)) predict_y = [ predict(state, kernel, test_x[i, :]) for i in range(test_y.shape[0]) ] np.savetxt('./output/p05_{}_predictions'.format(kernel_name), predict_y)
def main(): # plot DEBUG = False if DEBUG: from util import plot_points from matplotlib import pyplot as plt plt.figure() Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False) Ya = (Ya == 1).astype(np.float) plot_points(Xa, Ya) plt.figure() Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False) Yb = (Yb == 1).astype(np.float) plot_points(Xb, Yb) plt.show() import sys sys.exit() # print('==== Training model on data set A ====') # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) # logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) logistic_regression(Xb, Yb)
def process_eth_staking(th_stake_path, beth_distr_path): stake_rows = util.load_csv(th_stake_path) distr_rows = util.load_csv(beth_distr_path) stake_fe = extract_stake_flow_events(stake_rows) distr_fe = extract_distr_flow_events(distr_rows) res = stake_fe + distr_fe return res
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True) logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True) logistic_regression(Xb, Yb)
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) logistic_regression(Xa, Ya) plot(Xa, Ya, 'output/data_a.png') print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) #logistic_regression(Xb, Yb) plot(Xb, Yb, 'output/data_b.png')
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True) a1, a2, a3, a4, a5 = logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True) b1, b2, b3, b4, b5 = logistic_regression(Xb, Yb) return a1, a2, a3, a4, a5, b1, b2, b3, b4, b5
def main(train_path, eval_path): x_train, y_train = util.load_csv(train_path) x_eval, y_eval = util.load_csv(eval_path) pct = Perceptron('dot', dot_kernel) train(pct, x_train, y_train, x_eval, y_eval) pct = Perceptron('rbf', rbf_kernel) train(pct, x_train, y_train, x_eval, y_eval)
def load_dataset(path): train = util.load_csv(os.path.join(path, 'train.txt')) test = util.load_csv(os.path.join(path, 'test.txt')) X_train = train[:,1:].astype('float32') X_test = test[:,1:].astype('float32') Y_train = train[:,0].astype('int') Y_test = test[:,0].astype('int') return X_train, X_test, Y_train, Y_test
def main(): Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) plot_points(Xa, Ya) plt.savefig('output/p01_b_a.png') plot_points(Xb, Yb) plt.savefig('output/p01_b_b.png') #logistic_regression(Xa, Ya) logistic_regression(Xa, Ya)
def test(): #==== Plot data set A ====') x, y = util.load_csv('../data/ds1_a.csv', add_intercept=True) plt.figure() plt.plot(x[y == 1, -2], x[y == 1, -1], 'bx', linewidth=2) plt.plot(x[y == -1, -2], x[y == -1, -1], 'go', linewidth=2) plt.show() #==== Plot data set B ====') x, y = util.load_csv('../data/ds1_b.csv', add_intercept=True) plt.figure() plt.plot(x[y == 1, -2], x[y == 1, -1], 'bx', linewidth=2) plt.plot(x[y == -1, -2], x[y == -1, -1], 'go', linewidth=2) plt.show()
def drawPlotInstance(): attr1, classes1 = util.load_csv(train1) attr2, classes2 = util.load_csv(train2) #listOfY contains the last column of the training data set counts_1 = Counter( classes1) #counter counts the number of instances of each class counts_2 = Counter(classes2) x_array_1 = [] x_array_2 = [] y_count_array_1 = [] y_count_array_2 = [] length_1 = len(counts_1) length_2 = len(counts_2) #==================== dataset 1 #building x axis for i in range(length_1): x_array_1.append(i) #building y axis for i in range(length_1): y_count_array_1.append(counts_1[i]) #==================== dataset 2 #building x axis for i in range(length_2): x_array_2.append(i) #building y axis for i in range(length_2): y_count_array_2.append(counts_2[i]) #plotting figures, axes = plt.subplots(2) axes[0].plot(x_array_1, y_count_array_1) #axes[0].set_title("Dataset 1") axes[0].set(ylabel="Dataset 1") axes[0].set(xlabel="Classes") axes[1].plot(x_array_2, y_count_array_2) #axes[1].set_title("Dataset 2") axes[1].set(ylabel="Dataset 2") axes[1].set(xlabel="Classes") #drawPlotInstance(classes1, classes2)
def get_ge(net_name, model_parameters, load_parameters): args = util.EmptySpace() for key, value in load_parameters.items(): setattr(args, key, value) folder = "/media/rico/Data/TU/thesis/runs{}/{}".format( args.experiment, util.generate_folder_name(args)) ge_x, ge_y = [], [] lta, lva, ltl, lvl = [], [], [], [] for run in runs: filename = '{}/model_r{}_{}'.format( folder, run, get_save_name(net_name, model_parameters)) ge_path = '{}.exp'.format(filename) y_r = util.load_csv(ge_path, delimiter=' ', dtype=np.float) x_r = range(len(y_r)) ge_x.append(x_r) ge_y.append(y_r) if show_losses or show_acc: ta, va, tl, vl = util.load_loss_acc(filename) lta.append(ta) lva.append(va) ltl.append(tl) lvl.append(vl) return ge_x, ge_y, (lta, lva, ltl, lvl)
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True) plot_cost(Xa, Ya) # util.plot_points(Xa[:,1:], Ya, theta) plt.show() thetas = logistic_regression(Xa, Ya) # util.plot_points(Xa[:,1:], Ya, thetas) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True) plot_cost(Xb, Yb) # Xb += np.random.normal(scale=0.03, size=Xb.shape) # util.plot_points(Xb[:,1:], Yb) plt.show() thetas = logistic_regression(Xb, Yb)
def process_trade_history(filepath): res = {} info = {} rows = util.load_csv(filepath) clean_data(rows) flow_events = extract_flow_events(rows, coins_used) pairs = util.get_all_instances(rows, tdcols["pair"]) info["Pairs"] = pairs # rows_by_month = util.group_by_month(rows, tdcols["date"]) # # TODO: Revamp # avg_price = {} # for month in rows_by_month: # if month not in res: # res[month] = {} # for pair in pairs: # pair_rows = util.filter_by_kv(rows, tdcols["pair"], pair) # sell_pair_rows = util.filter_by_kv( # pair_rows, tdcols["side"], "SELL") # buy_pair_rows = util.filter_by_kv(pair_rows, tdcols["side"], "BUY") # if len(sell_pair_rows) > 0: # avg_price[f"{pair}_SELL"] = util.weighted_average( # sell_pair_rows, tdcols["executed"], tdcols["price"]) # if len(buy_pair_rows) > 0: # avg_price[f"{pair}_BUY"] = util.weighted_average( # buy_pair_rows, tdcols["executed"], tdcols["price"]) # res[month]["Mean Buy Price"] = avg_price return flow_events, info
def run(): # ========= DATASET 1 ========= # filepath = "./output/Base-DT-DS1.csv" X_train, Y_train = util.load_csv(util.train_1_filepath) X_test, Y_test = util.load_csv(util.test_with_label_1_filepath) clf = tree.DecisionTreeClassifier(criterion="entropy") # Train clf = clf.fit(X_train, Y_train) # Test/Predict Y_pred = clf.predict(X_test) # Confusion Matrix confusion_matrix = metrics.confusion_matrix(Y_test, Y_pred) metrics.plot_confusion_matrix(clf, X_test, Y_test) # Evaluation classification_report = metrics.classification_report(Y_test, Y_pred) # Debug print print_debug(1, clf, Y_pred, confusion_matrix, classification_report) # Save util.write_csv(filepath, Y_test, Y_pred, confusion_matrix) # ========= DATASET 2 ========= # filepath = "./output/Base-DT-DS2.csv" X_train, Y_train = util.load_csv(util.train_2_filepath) X_test, Y_test = util.load_csv(util.test_with_label_2_filepath) clf = tree.DecisionTreeClassifier(criterion="entropy") # Train clf = clf.fit(X_train, Y_train) # Test/Predict Y_pred = clf.predict(X_test) # Confusion Matrix confusion_matrix = metrics.confusion_matrix(Y_test, Y_pred) metrics.plot_confusion_matrix(clf, X_test, Y_test) # Evaluation classification_report = metrics.classification_report(Y_test, Y_pred) # Debug print print_debug(2, clf, Y_pred, confusion_matrix, classification_report) # Save util.write_csv(filepath, Y_test, Y_pred, confusion_matrix) # DEBUG-------------------------------------------------------------------- #run()
def main(): print('==== Training model on data set A ====') X_24, Y_24 = util.load_csv('../Data/ds1_x2_x4.csv', add_intercept=True) theta = logistic_regression(X_24, Y_24) np.savetxt('../Output/lr_1.txt', theta) util.plot(X_24, Y_24, theta, 'Bridge Age', 'Earthquake Magnitude', '../Output/lr_visual_1.png') X_25, Y_25 = util.load_csv('../Data/ds1_x2_x5.csv', add_intercept=True) theta = logistic_regression(X_25, Y_25) np.savetxt('../Output/lr_2.txt', theta) util.plot(X_25, Y_25, theta, 'Bridge Age', 'Distance to Epicenter', '../Output/lr_visual_2.png') X_45, Y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=True) theta = logistic_regression(X_45, Y_45) np.savetxt('../Output/lr_3.txt', theta) util.plot(X_45, Y_45, theta, 'Earthquake Magnitude', 'Distance to Epicenter', '../Output/lr_visual_3.png')
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) # logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) # logistic_regression(Xb, Yb) # Training for Xb, Yb is not converging as in Xa, Ya. # Lets examine the data # Each y is either {1, -1} # Each x has x1, x2 and an x0=1 intercept print("A:") print(Xa.shape, Ya.shape) print(Xa[0]) print(set(Ya)) Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False) x1 = Xa[:, 0] x2 = Xa[:, 1] plt.figure() plt.scatter(x1[Ya == -1], x2[Ya == -1], color='red') plt.scatter(x1[Ya == 1], x2[Ya == 1], color='blue') plt.xlabel('x1') plt.ylabel('x2') plt.title("Xa") plt.savefig("p01_lr_Xa") print("B:") print(Xb.shape, Yb.shape) print(Xb[0]) print(set(Yb)) Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False) x1 = Xb[:, 0] x2 = Xb[:, 1] plt.figure() plt.scatter(x1[Yb == -1], x2[Yb == -1], color='red') plt.scatter(x1[Yb == 1], x2[Yb == 1], color='blue') plt.xlabel('x1') plt.ylabel('x2') plt.title("Xb") plt.savefig("p01_lr_Xb")
def train_perceptron(train_path, test_path, kernel_name, kernel, learning_rate): """Train a perceptron with the given kernel. This function trains a perceptron with a given kernel and then uses that perceptron to make predictions. The output predictions are saved to src/output/p05_{kernel_name}_predictions.txt The output plots are saved to src/output_{kernel_name}_output.pdf Args: kernel_name: The name of the kernel kernel: The kernel function learning_rate: The learning rate for training """ train_x, train_y = util.load_csv(train_path) test_x, test_y = util.load_csv(test_path) state = initial_state() for x_i, y_i in zip(train_x, train_y): update_state(state, kernel, learning_rate, x_i, y_i) plt.figure() util.plot_contour(lambda a: predict(state, kernel, a)) util.plot_points(test_x, test_y) plt.title( f"Kernel: {kernel_name} || Test data, color corresponds to real label") plt.legend() plt.xlabel("x1") plt.ylabel("x2") y_pred = np.array( [predict(state, kernel, test_x[i, :]) for i in range(test_y.shape[0])]) plt.figure() util.plot_points(test_x, y_pred) plt.title( f"Kernel: {kernel_name} || Test data, color corresponds to predicted label" ) plt.legend() plt.xlabel("x1") plt.ylabel("x2") plt.show()
def load_all_data(use_hw, data_set): # Load Data loader = util.load_data_set(data_set) data_set_name = str(data_set) total_x_attack, total_y_attack = loader({ 'use_hw': use_hw, 'traces_path': '/media/rico/Data/TU/thesis/data' }) total_key_guesses = np.transpose( util.load_csv( '/media/rico/Data/TU/thesis/data/{}/Value/key_guesses_ALL.csv'. format(data_set_name), delimiter=' ', dtype=np.int)) real_key = util.load_csv( '/media/rico/Data/TU/thesis/data/{}/secret_key.csv'.format( data_set_name), dtype=np.int) return total_x_attack, total_y_attack, total_key_guesses, real_key
def _cache(data: TextIO, model_name: Text, output: BinaryIO, **kwargs): cpu = require_device(prefer_cuda=False) model_type = models.select(model_name) model = ModelInterface(model_type, cpu, False) csv = util.load_csv(data) cache = {} for smiles in csv.keys(): cache_key = (smiles, ) # memcached is indexed on argument list data = model.process(smiles) cache[cache_key] = model.encode_data(data, **kwargs) pickle.dump(cache, output)
def main(): print('==== Training model on data set A ====') Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) #logistic_regression(Xa, Ya) for i in range(Ya.shape[0]): if Ya[i] == 1: plt.plot(Xa[i][1],Xa[i][2],'bx') else: plt.plot(Xa[i][1],Xa[i][2],'go') plt.figure(1) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) logistic_regression(Xb, Yb) plt.figure(2) for i in range(Ya.shape[0]): if Yb[i] == 1: plt.plot(Xb[i][1],Xb[i][2],'bx') else: plt.plot(Xb[i][1],Xb[i][2],'go') plt.show()
def main(): # # Plot dataset A and B # from util import plot_points # import matplotlib.pyplot as plt # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False) # plt.figure() # plot_points(Xa, (Ya == 1).astype(int)) # plt.savefig('output/ds1_a.png') # Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False) # plt.figure() # plot_points(Xb, (Yb == 1).astype(int)) # plt.savefig('output/ds1_b.png') print('==== Training model on data set A ====') Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) logistic_regression(Xb, Yb)
def main(): # print('==== Training model on data set A ====') # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False) # util.plot_points(Xa, Ya) # Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False) # util.plot_points(Xb, Yb) # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True) # logistic_regression(Xa, Ya) print('\n==== Training model on data set B ====') Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True) logistic_regression(Xb, Yb)
def run_dataset(filepath_train, filepath_test, filepath_output): x_train, y_train = util.load_csv(filepath_train) x_test, y_test = util.load_csv(filepath_test) clf = Perceptron() y_pred = clf.fit(x_train,y_train).predict(x_test) train_accuracy = clf.score(x_train, y_train) test_accuracy = metrics.accuracy_score(y_test, y_pred) #confusion matrix cmatrix = metrics.confusion_matrix(y_test, y_pred) metrics.plot_confusion_matrix(clf, x_test, y_test) #evalution classification_report = metrics.classification_report(y_test, y_pred) #print to output file util.write_csv(filepath_output, y_test, y_pred, cmatrix) #print to console for debug purposes print_result(clf, train_accuracy, test_accuracy, y_pred, cmatrix, classification_report, filepath_output)
def main(): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ # Load dataset x_45, y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=True) Gauss_analysis = GDA() theta = Gauss_analysis.fit(x_45, y_45) print(theta) np.savetxt('../Output/GDA_3.txt', theta) util.plot(x_45, y_45, theta, '../Output/GDA_visual_3.png')
def load_data(args): _x_attack, _y_attack, _real_key, _dk_plain, _key_guesses = None, None, None, None, None ################### # Load the traces # ################### loader = util.load_data_set(args.data_set) total_x_attack, total_y_attack, plain = loader({'use_hw': args.use_hw, 'traces_path': args.traces_path, 'raw_traces': args.raw_traces, 'start': args.train_size + args.validation_size, 'size': args.attack_size, 'domain_knowledge': True, 'use_noise_data': args.use_noise_data, 'data_set': args.data_set, 'noise_level': args.noise_level}) if plain is not None: _dk_plain = torch.from_numpy(plain).cuda() print('Loading key guesses') #################################### # Load the key guesses and the key # #################################### data_set_name = str(args.data_set) _key_guesses = util.load_csv('{}/{}/Value/key_guesses_ALL_transposed.csv'.format( args.traces_path, data_set_name), delimiter=' ', dtype=np.int, start=args.train_size + args.validation_size, size=args.attack_size) _real_key = util.load_csv('{}/{}/secret_key.csv'.format(args.traces_path, data_set_name), dtype=np.int) _x_attack = total_x_attack _y_attack = total_y_attack return _x_attack, _y_attack, _key_guesses, _real_key, _dk_plain
def run_dataset(filepath_train, filepath_test, filepath_output): x_train, y_train = util.load_csv(filepath_train) x_test, y_test = util.load_csv(filepath_test) gnb = GaussianNB() y_pred = gnb.fit(x_train, y_train).predict(x_test) train_accuracy = gnb.score(x_train, y_train) test_accuracy = metrics.accuracy_score(y_test, y_pred) #confusion matrix cmatrix = metrics.confusion_matrix(y_test, y_pred) metrics.plot_confusion_matrix(gnb, x_test, y_test) #evalution classification_report = metrics.classification_report(y_test, y_pred) #output file util.write_csv(filepath_output, y_test, y_pred, cmatrix) #Print result to console print_result(gnb, train_accuracy, test_accuracy, y_pred, cmatrix, classification_report, filepath_output)
def _combine_csvs(data_dir): data = [] csvs = [os.path.join(data_dir, x) for x in os.listdir(data_dir) if x.split('.')[-1] == 'csv'] sub_dirs = [x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x))] for sd in sub_dirs: csvs.extend([os.path.join(data_dir, sd, x) for x in os.listdir(os.path.join(data_dir, sd)) if x.split('.')[-1] == 'csv']) for csv_ in csvs: data.extend(util.load_csv(csv_)) return data
for i in range(1, len(a)+1): prev = present present = [i] for j in range(1, len(b)+1): if(a[i-1] == b[j-1]): present.append(prev[j-1]) else: present.append(min(prev[j-1], prev[j], present[j-1])+1) return present[-1] if(__name__ == '__main__'): if(len(sys.argv)!=3): print "Usage: python validate.py <ground_truth_file> <output_label_file>" exit() ground_truth = load_csv(sys.argv[1]) label = load_csv(sys.argv[2]) # match ground truth in output all_dist = 0 for truth in ground_truth: idx = -1 for i in range(0, len(label)): if(truth[0] == label[i][0]): idx = i break if(idx==-1): print "Entry not found: %s" %truth[0] break all_dist += edit_dist(truth[1], label[i][1]) print "Average edit distance = %f" %(float(all_dist)/len(ground_truth))