def compare_sharptni_tnet_cdc(threshold): data_dir = 'CDC/' folders = next(os.walk(data_dir))[1] folders.sort() F1_file = open('results/sharptni/cdc.bestTree.sharptni.sankoff_sample.tnet.new.rand.mod.th.'+str(threshold)+'.csv', 'w+') F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1\n') for folder in folders: print('inside folder: ',folder) F1 = [] sample_list = next(os.walk(data_dir + folder + '/sharptni_output'))[2] sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')] sharptni_file = sharptni_file[0] th2 = int(sharptni_file.split('.')[1]) th2 = round(th2 * (threshold / 100)) print(th2) real = set(cdc.get_true_transmission_edges(folder)) sharp = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni_output/' + sharptni_file, th2)) tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_mod_rand_bootstrap/25.tnet', threshold)) F1.extend(get_prec_rec_f1(real, sharp)) F1.extend(get_prec_rec_f1(real, tnet)) F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5])) F1_file.close()
def compare_favites_best_tree_sharptni_tnet_new_tnet_bias_directed(sample_th): F1_file = open('results/single_tree_sharptni/favites.best_tree.sharptni_min_coinf.tnet_new.tnet_bias.sample_th.' + str(sample_th) + '.csv', 'w+') F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n') folders = next(os.walk('outputs/'))[1] for folder in folders: print('inside folder:', folder) F1 = [] sample_list = next(os.walk('outputs/' + folder + '/sharptni_single'))[2] sharptni_file = [idx for idx in sample_list if idx.startswith('bestTree_sankoff_min_coinfection.100_sample')] sharptni_file = sharptni_file[0] sample_num = int(sharptni_file.split('.')[2]) sharp_th = math.ceil(sample_num * (sample_th / 100)) print(sample_num, sharp_th) real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) sharptni = set(ge.get_mul_tnet_edges('outputs/' + folder + '/sharptni_single/' + sharptni_file, sharp_th)) tnet = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.100.tnet_new', sample_th)) tnet_bias = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.100.tnet_new_with_bias', sample_th)) F1.extend(get_prec_rec_f1(real, sharptni)) F1.extend(get_prec_rec_f1(real, tnet)) F1.extend(get_prec_rec_f1(real, tnet_bias)) F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8])) F1_file.close()
def create_directed_tnet_bootstrap_summary(tree_folder, threshold): data_dir = 'covid_19/NCBI/' edge_dict = {} bootstrap_folder = data_dir + tree_folder output_folder = data_dir + '/tnet_output_complete/' if not os.path.exists(output_folder): os.mkdir(output_folder) if not os.path.exists(output_folder + 'bootstrap_tnet_bias_100_th_' + str(threshold) + '_summary.csv'): result = open( output_folder + 'bootstrap_tnet_bias_100_th_' + str(threshold) + '_summary.csv', 'w+') file_list = next(os.walk(bootstrap_folder))[2] for file in file_list: tnet_file = bootstrap_folder + '/' + file tnet_edges = ge.get_mul_tnet_edges(tnet_file, threshold) for edge in tnet_edges: if edge in edge_dict: edge_dict[edge] += 1 else: edge_dict[edge] = 1 edge_dict = dict( sorted(edge_dict.items(), key=operator.itemgetter(1), reverse=True)) for x, y in edge_dict.items(): result.write('{},{}\n'.format(x, y))
def compare_sharptni_best_tree(): data_dir = 'outputs/' folders = next(os.walk(data_dir))[1] folders.sort() thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] F1_file = open('results/sharptni/best_tree.recall.sample_sankoff.csv', 'w+') F1_file.write('dataset,10,20,30,40,50,60,70,80,90,100\n') for folder in folders: print('inside folder: ',folder) real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) sample_list = next(os.walk(data_dir + folder + '/sharptni'))[2] sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')] sharptni_file = sharptni_file[0] sample_num = int(sharptni_file.split('.')[1]) print(sample_num) F1 = [] for th in thresholds: thr = round(sample_num * (th / 100)) tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni/' + sharptni_file, thr)) temp = get_prec_rec_f1(real, tnet) F1.append(temp[1]) F1_file.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5] ,F1[6],F1[7],F1[8],F1[9]))
def create_cdc_tnet_summary_directed(threshold): for outbreak in known_outbreaks: print('Inside', outbreak) input_folder = 'CDC/' + outbreak + '/tnet_new_bootstrap' output_folder = 'CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed' if not os.path.exists(output_folder): os.mkdir(output_folder) edge_dict = {} result = open( output_folder + '/tnet_new_bootstrap' + '_th_' + str(threshold) + '_summary.csv', 'w+') file_list = next(os.walk(input_folder))[2] for file in file_list: tnet_file = input_folder + '/' + file tnet_edges = ge.get_mul_tnet_edges(tnet_file, threshold) for edge in tnet_edges: if edge in edge_dict: edge_dict[edge] += 1 else: edge_dict[edge] = 1 edge_dict = dict( sorted(edge_dict.items(), key=operator.itemgetter(1), reverse=True)) for x, y in edge_dict.items(): result.write('{},{}\n'.format(x, y))
def compare_tnet_cdc_single_tree(): F1_file = open('results/cdc_single_tree_tnet/single_tree.f1.tnet.new.with.min.csv', 'w+') F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n') thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] for outbreak in cdc.known_outbreaks: real = set(cdc.get_true_transmission_edges(outbreak)) tnet_single = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.1.tnet_new_min', 0)) single_run = get_prec_rec_f1(real, tnet_single)[2] F1 = [] for th in thresholds: tnet = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.100.tnet_new_min', th)) temp = get_prec_rec_f1(real, tnet) F1.append(temp[2]) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5] ,F1[6],F1[7],F1[8],F1[9]))
def compare_favites_sharptni_phyloscanner_tnet_new_tnet_bias_single_tree_single_run(): F1_file = open('results/single_tree_sharptni/favites.phyloscanner.sharptni.min.coinf.tnet.new.tnet.bias.single_tree.single_run.csv', 'w+') F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n') phylo_dir = '/home/saurav/research/FAVITES_compare_TNet_v2/outputs/' folders = next(os.walk('outputs/'))[1] for folder in folders: print('inside folder:', folder) F1 = [] real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) phylo = set(ge.get_phyloscanner_single_tree_edges(phylo_dir + folder + '/phyloscanner_best_tree/favites_collapsedTree.csv')) sharptni = set(ge.get_mul_tnet_edges('outputs/' + folder + '/sharptni_single/bestTree_sankoff_min_coinfection.1', 1)) tnet = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.1.tnet_new', 1)) tnet_bias = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.1.tnet_new_with_bias', 1)) F1.extend(get_prec_rec_f1(real, phylo)) F1.extend(get_prec_rec_f1(real, sharptni)) F1.extend(get_prec_rec_f1(real, tnet)) F1.extend(get_prec_rec_f1(real, tnet_bias)) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8],F1[9],F1[10],F1[11])) F1_file.close()
def compare_tnet_single_run(): data_dir = 'outputs/' folders = next(os.walk(data_dir))[1] folders.sort() F1_file = open('results/single_tree_tnet/best_tree.tnet.new.50.csv', 'w+') F1_file.write('dataset,precision,recall,f1\n') for folder in folders: print('inside folder: ',folder) real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) tnet_single = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', 50)) F1 = get_prec_rec_f1(real, tnet_single) F1_file.write('{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2]))
def compare_tnet_best_tree(): data_dir = 'outputs/' folders = next(os.walk(data_dir))[1] folders.sort() thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] F1_file = open('results/single_tree_tnet/best_tree.recall.tnet.new.csv', 'w+') F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n') for folder in folders: print('inside folder: ',folder) real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) tnet_single = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.1.tnet_new', 0)) single_run = get_prec_rec_f1(real, tnet_single)[1] F1 = [] for th in thresholds: tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', th)) temp = get_prec_rec_f1(real, tnet) F1.append(temp[1]) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5] ,F1[6],F1[7],F1[8],F1[9]))
def compare_phyloscanner_tnet_best_tree(threshold): data_dir = 'outputs/' folders = next(os.walk(data_dir))[1] folders.sort() F1_file = open('results/best_tree.phyloscanner.tnet.new.th.'+str(threshold)+'.csv', 'w+') F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1\n') for folder in folders: print('inside folder: ',folder) F1 = [] real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt')) phylo = set(ge.get_phyloscanner_single_tree_edges(data_dir + folder + '/phyloscanner_best_tree/favites_collapsedTree.csv')) tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', threshold)) F1.extend(get_prec_rec_f1(real, phylo)) F1.extend(get_prec_rec_f1(real, tnet)) F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5])) F1_file.close()
def compare_tnet_best_tree(): data_dir = 'dataset/' folders = next(os.walk(data_dir))[1] folders.sort() thresholds = [50, 60, 70, 80, 90, 100] F1_file = open('results/old.prec.rec.f1.tnet.csv', 'w+') F1_file.write('dataset,prec_50,rec_50,f1_50,prec_60,rec_60,f1_60,prec_70,rec_70,f1_70,prec_80,rec_80,f1_80,prec_90,rec_90,f1_90,prec_100,rec_100,f1_100\n') for folder in folders: print('inside folder: ',folder) F1 = [] for th in thresholds: real = set(ge.get_real_edges(data_dir + folder + '/transmission_network.txt')) tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_old_100.tnet', th)) # tnet_new = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_100.tnet', th)) temp = get_prec_rec_f1(real, tnet) F1.extend(temp) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5] ,F1[6],F1[7],F1[8],F1[9],F1[10],F1[11],F1[12],F1[13],F1[14],F1[15],F1[16],F1[17]))
def compare_tnets_directed(th=50): data_dir = 'dataset/' folders = next(os.walk(data_dir))[1] folders.sort() TP_FP_FN_file = open( 'directed.tnet.old.new.th_' + str(th) + '.TP_FP_FN.csv', 'w+') TP_FP_FN_file.write( 'dataset,tnet_old_tp,tnet_old_fp,tnet_old_fn,tnet_new_tp,tnet_new_fp,tnet_new_fn\n' ) F1_file = open('directed.tnet.old.new.th_' + str(th) + '.F1.csv', 'w+') F1_file.write( 'dataset,tnet_old_prec,tnet_old_rec,tnet_old_f1,tnet_new_prec,tnet_new_rec,tnet_new_f1\n' ) for folder in folders: print('inside folder: ', folder) TP_FP_FN = [] F1 = [] real = set( ge.get_real_edges(data_dir + folder + '/transmission_network.txt')) tnet_old = set( ge.get_mul_tnet_edges(data_dir + folder + '/tnet_old_100.tnet', 50)) tnet_new = set( ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_100.tnet', 50)) TP = len(real & tnet_old) FP = len(tnet_old - real) FN = len(real - tnet_old) try: precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * (recall * precision) / (recall + precision) except ZeroDivisionError: precision = 0 recall = 0 f1 = 0 TP_FP_FN.append(TP) TP_FP_FN.append(FP) TP_FP_FN.append(FN) F1.append(round(precision, 3)) F1.append(round(recall, 3)) F1.append(round(f1, 3)) TP = len(real & tnet_new) FP = len(tnet_new - real) FN = len(real - tnet_new) try: precision = TP / (TP + FP) recall = TP / (TP + FN) f1 = 2 * (recall * precision) / (recall + precision) except ZeroDivisionError: precision = 0 recall = 0 f1 = 0 TP_FP_FN.append(TP) TP_FP_FN.append(FP) TP_FP_FN.append(FN) F1.append(round(precision, 3)) F1.append(round(recall, 3)) F1.append(round(f1, 3)) TP_FP_FN_file.write('{},{},{},{},{},{},{}\n'.format( folder, TP_FP_FN[0], TP_FP_FN[1], TP_FP_FN[2], TP_FP_FN[3], TP_FP_FN[4], TP_FP_FN[5])) F1_file.write('{},{},{},{},{},{},{}\n'.format(folder, F1[0], F1[1], F1[2], F1[3], F1[4], F1[5]))