예제 #1
0
def compare_sharptni_tnet_cdc(threshold):
	data_dir = 'CDC/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	F1_file = open('results/sharptni/cdc.bestTree.sharptni.sankoff_sample.tnet.new.rand.mod.th.'+str(threshold)+'.csv', 'w+')
	F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1\n')

	for folder in folders:
		print('inside folder: ',folder)
		F1 = []
		sample_list = next(os.walk(data_dir + folder + '/sharptni_output'))[2]
		sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')]
		sharptni_file = sharptni_file[0]
		th2 = int(sharptni_file.split('.')[1])
		th2 = round(th2 * (threshold / 100))
		print(th2)

		real = set(cdc.get_true_transmission_edges(folder))
		sharp = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni_output/' + sharptni_file, th2))
		tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_mod_rand_bootstrap/25.tnet', threshold))

		F1.extend(get_prec_rec_f1(real, sharp))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]))

	F1_file.close()
예제 #2
0
def compare_cdc_undirected(threshold):
	F1_file = open('results/cdc_undirected_comparison/cdc.phyloscanner.tnet.new.th.' + str(threshold) + '.csv', 'w+')
	F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1\n')

	for outbreak in cdc.known_outbreaks:
		F1 = []
		bootstrap = len(next(os.walk('CDC/' + outbreak + '/tnet_new_bootstrap'))[2])

		real = set(cdc.get_true_transmission_edges(outbreak))
		phylo = set(ge.get_phyloscanner_summary_trans_and_complex_edges('CDC/' + outbreak + '/phyloscanner_output/cdc_hostRelationshipSummary.csv', bootstrap//2))
		tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_summary_undirected/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2))

		F1.extend(get_prec_rec_f1_undirected(real, phylo))
		F1.extend(get_prec_rec_f1_undirected(real, tnet))
		F1_file.write('{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]))

	F1_file.close()
예제 #3
0
def compare_tnet_cdc_single_tree():
	F1_file = open('results/cdc_single_tree_tnet/single_tree.f1.tnet.new.with.min.csv', 'w+')
	F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n')
	thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

	for outbreak in cdc.known_outbreaks:
		real = set(cdc.get_true_transmission_edges(outbreak))
		tnet_single = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.1.tnet_new_min', 0))
		single_run = get_prec_rec_f1(real, tnet_single)[2]

		F1 = []
		for th in thresholds:
			tnet = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.100.tnet_new_min', th))
			temp = get_prec_rec_f1(real, tnet)
			F1.append(temp[2])

		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]
						,F1[6],F1[7],F1[8],F1[9]))
예제 #4
0
def compare_cdc_directed(threshold):
	F1_file = open('results/cdc_directed_comparison/cdc.phyloscanner.tnet.new.tnet.bias.th.' + str(threshold) + '.csv', 'w+')
	F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n')
	out_dir = '/home/saurav/research/FAVITES_compare_TNet_v2/'

	for outbreak in cdc.known_outbreaks:
		F1 = []
		bootstrap = len(next(os.walk('CDC/' + outbreak + '/tnet_input'))[2])

		real = set(cdc.get_true_transmission_edges(outbreak))
		phylo = set(ge.get_phyloscanner_summary_trans_edges('CDC/' + outbreak + '/phyloscanner_output/cdc_hostRelationshipSummary.csv', bootstrap//2))
		tnet = set(ge.get_tnet_summary_edges(out_dir + 'CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2))
		tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_summary_directed/tnet_new_bootstrap_th_' + str(threshold) + '_summary.csv', bootstrap//2))

		F1.extend(get_prec_rec_f1(real, phylo))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1.extend(get_prec_rec_f1(real, tnet_bias))
		F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8]))

	F1_file.close()
예제 #5
0
def compare_cdc_sharptni_tnet_directed(bootstrap_th, sample_th):
	F1_file = open('results/sharptni_directed_comparison/cdc.sharptni.tnet.new.tnet.bias.sample_th.' + str(sample_th) + '.bootstrap_th.' + str(bootstrap_th) + '.csv', 'w+')
	F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n')

	for outbreak in cdc.known_outbreaks:
		print('inside outbreak:',outbreak)
		F1 = []
		boot_th = len(next(os.walk('CDC/' + outbreak + '/tnet_input'))[2])
		boot_th = math.ceil(boot_th * (bootstrap_th / 100))

		real = set(cdc.get_true_transmission_edges(outbreak))
		sharptni = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/sharptni_sankoff_sample_bootstrap_summary_directed/sankoff_sample_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))
		tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))
		tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))

		F1.extend(get_prec_rec_f1(real, sharptni))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1.extend(get_prec_rec_f1(real, tnet_bias))
		F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8]))

	F1_file.close()
예제 #6
0
def compare_cdc_phyloscanner_sharptni_tnet_new_tnet_bias_directed(bootstrap_th, sample_th):
	F1_file = open('results/sharptni_min_coinfection_directed_comparison/cdc.phyloscanner.sharptni.min.coinf.tnet.new.tnet.bias.sample_th.' + str(sample_th) + '.bootstrap_th.' + str(bootstrap_th) + '.csv', 'w+')
	F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n')

	for outbreak in cdc.known_outbreaks:
		print('inside folder:', outbreak)
		F1 = []
		boot_th = len(next(os.walk('CDC/' + outbreak + '/rooted_bootstrap_trees_100'))[2])
		boot_th = math.ceil(boot_th * (bootstrap_th / 100))

		real = set(cdc.get_true_transmission_edges(outbreak))
		phylo = set(ge.get_phyloscanner_summary_trans_edges('CDC/' + outbreak + '/phyloscanner_output_100/CDC_hostRelationshipSummary.csv', boot_th))
		sharptni = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/sharptni_sankoff_sample_100_bootstrap_min_coinfection_summary_directed/sankoff_sample_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))
		tnet = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_100_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))
		tnet_bias = set(ge.get_tnet_summary_edges('CDC/' + outbreak + '/tnet_new_bootstrap_with_bias_100_summary_directed/tnet_new_bootstrap_th_' + str(sample_th) + '_summary.csv', boot_th))

		F1.extend(get_prec_rec_f1(real, phylo))
		F1.extend(get_prec_rec_f1(real, sharptni))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1.extend(get_prec_rec_f1(real, tnet_bias))
		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8],F1[9],F1[10],F1[11]))

	F1_file.close()