def compare_sharptni_tnet_cdc(threshold): data_dir = 'CDC/' folders = next(os.walk(data_dir))[1] folders.sort() F1_file = open('results/sharptni/cdc.bestTree.sharptni.sankoff_sample.tnet.new.rand.mod.th.'+str(threshold)+'.csv', 'w+') F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1\n') for folder in folders: print('inside folder: ',folder) F1 = [] sample_list = next(os.walk(data_dir + folder + '/sharptni_output'))[2] sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')] sharptni_file = sharptni_file[0] th2 = int(sharptni_file.split('.')[1]) th2 = round(th2 * (threshold / 100)) print(th2) real = set(cdc.get_true_transmission_edges(folder)) sharp = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni_output/' + sharptni_file, th2)) tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_mod_rand_bootstrap/25.tnet', threshold)) F1.extend(get_prec_rec_f1(real, sharp)) F1.extend(get_prec_rec_f1(real, tnet)) F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5])) F1_file.close()
def compare_cdc_undirected(threshold): F1_file = open('results/cdc_undirected_comparison/cdc.phyloscanner.tnet.new.th.' + str(threshold) + '.csv', 'w+') F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1\n') for outbreak in cdc.known_outbreaks: F1 = [] bootstrap = len(next(os.walk('CDC/' + outbreak + '/tnet_new_bootstrap'))[2]) real = set(cdc.get_true_transmission_edges(outbreak)) phylo = set(ge.get_phyloscanner_summary_trans_and_complex_edges('CDC/' + outbreak + '/phyloscanner_output/cdc_hostRelationshipSummary.csv', bootstrap//2)) tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_summary_undirected/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2)) F1.extend(get_prec_rec_f1_undirected(real, phylo)) F1.extend(get_prec_rec_f1_undirected(real, tnet)) F1_file.write('{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5])) F1_file.close()
def compare_tnet_cdc_single_tree(): F1_file = open('results/cdc_single_tree_tnet/single_tree.f1.tnet.new.with.min.csv', 'w+') F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n') thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] for outbreak in cdc.known_outbreaks: real = set(cdc.get_true_transmission_edges(outbreak)) tnet_single = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.1.tnet_new_min', 0)) single_run = get_prec_rec_f1(real, tnet_single)[2] F1 = [] for th in thresholds: tnet = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.100.tnet_new_min', th)) temp = get_prec_rec_f1(real, tnet) F1.append(temp[2]) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5] ,F1[6],F1[7],F1[8],F1[9]))
def compare_cdc_directed(threshold): F1_file = open('results/cdc_directed_comparison/cdc.phyloscanner.tnet.new.tnet.bias.th.' + str(threshold) + '.csv', 'w+') F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n') out_dir = '/home/saurav/research/FAVITES_compare_TNet_v2/' for outbreak in cdc.known_outbreaks: F1 = [] bootstrap = len(next(os.walk('CDC/' + outbreak + '/tnet_input'))[2]) real = set(cdc.get_true_transmission_edges(outbreak)) phylo = set(ge.get_phyloscanner_summary_trans_edges('CDC/' + outbreak + '/phyloscanner_output/cdc_hostRelationshipSummary.csv', bootstrap//2)) tnet = set(ge.get_tnet_summary_edges(out_dir + 'CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2)) tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_summary_directed/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2)) F1.extend(get_prec_rec_f1(real, phylo)) F1.extend(get_prec_rec_f1(real, tnet)) F1.extend(get_prec_rec_f1(real, tnet_bias)) F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8])) F1_file.close()
def compare_cdc_sharptni_tnet_directed(bootstrap_th, sample_th): F1_file = open('results/sharptni_directed_comparison/cdc.sharptni.tnet.new.tnet.bias.sample_th.' + str(sample_th) + '.bootstrap_th.' + str(bootstrap_th) + '.csv', 'w+') F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n') for outbreak in cdc.known_outbreaks: print('inside outbreak:',outbreak) F1 = [] boot_th = len(next(os.walk('CDC/' + outbreak + '/tnet_input'))[2]) boot_th = math.ceil(boot_th * (bootstrap_th / 100)) real = set(cdc.get_true_transmission_edges(outbreak)) sharptni = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/sharptni_sankoff_sample_bootstrap_summary_directed/sankoff_sample_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) F1.extend(get_prec_rec_f1(real, sharptni)) F1.extend(get_prec_rec_f1(real, tnet)) F1.extend(get_prec_rec_f1(real, tnet_bias)) F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8])) F1_file.close()
def compare_cdc_phyloscanner_sharptni_tnet_new_tnet_bias_directed(bootstrap_th, sample_th): F1_file = open('results/sharptni_min_coinfection_directed_comparison/cdc.phyloscanner.sharptni.min.coinf.tnet.new.tnet.bias.sample_th.' + str(sample_th) + '.bootstrap_th.' + str(bootstrap_th) + '.csv', 'w+') F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n') for outbreak in cdc.known_outbreaks: print('inside folder:', outbreak) F1 = [] boot_th = len(next(os.walk('CDC/' + outbreak + '/rooted_bootstrap_trees_100'))[2]) boot_th = math.ceil(boot_th * (bootstrap_th / 100)) real = set(cdc.get_true_transmission_edges(outbreak)) phylo = set(ge.get_phyloscanner_summary_trans_edges('CDC/' + outbreak + '/phyloscanner_output_100/CDC_hostRelationshipSummary.csv', boot_th)) sharptni = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/sharptni_sankoff_sample_100_bootstrap_min_coinfection_summary_directed/sankoff_sample_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_100_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_100_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th)) F1.extend(get_prec_rec_f1(real, phylo)) F1.extend(get_prec_rec_f1(real, sharptni)) F1.extend(get_prec_rec_f1(real, tnet)) F1.extend(get_prec_rec_f1(real, tnet_bias)) F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8],F1[9],F1[10],F1[11])) F1_file.close()