def compare_sharptni_tnet_cdc(threshold):
	data_dir = 'CDC/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	F1_file = open('results/sharptni/cdc.bestTree.sharptni.sankoff_sample.tnet.new.rand.mod.th.'+str(threshold)+'.csv', 'w+')
	F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1\n')

	for folder in folders:
		print('inside folder: ',folder)
		F1 = []
		sample_list = next(os.walk(data_dir + folder + '/sharptni_output'))[2]
		sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')]
		sharptni_file = sharptni_file[0]
		th2 = int(sharptni_file.split('.')[1])
		th2 = round(th2 * (threshold / 100))
		print(th2)

		real = set(cdc.get_true_transmission_edges(folder))
		sharp = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni_output/' + sharptni_file, th2))
		tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_mod_rand_bootstrap/25.tnet', threshold))

		F1.extend(get_prec_rec_f1(real, sharp))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]))

	F1_file.close()
def compare_favites_best_tree_sharptni_tnet_new_tnet_bias_directed(sample_th):
	F1_file = open('results/single_tree_sharptni/favites.best_tree.sharptni_min_coinf.tnet_new.tnet_bias.sample_th.' + str(sample_th) + '.csv', 'w+')
	F1_file.write('dataset,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n')

	folders = next(os.walk('outputs/'))[1]

	for folder in folders:
		print('inside folder:', folder)
		F1 = []
		sample_list = next(os.walk('outputs/' + folder + '/sharptni_single'))[2]
		sharptni_file = [idx for idx in sample_list if idx.startswith('bestTree_sankoff_min_coinfection.100_sample')]
		sharptni_file = sharptni_file[0]
		sample_num = int(sharptni_file.split('.')[2])
		sharp_th = math.ceil(sample_num * (sample_th / 100))
		print(sample_num, sharp_th)

		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		sharptni = set(ge.get_mul_tnet_edges('outputs/' + folder + '/sharptni_single/' + sharptni_file, sharp_th))
		tnet = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.100.tnet_new', sample_th))
		tnet_bias = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.100.tnet_new_with_bias', sample_th))

		F1.extend(get_prec_rec_f1(real, sharptni))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1.extend(get_prec_rec_f1(real, tnet_bias))
		F1_file.write('{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8]))

	F1_file.close()
def create_directed_tnet_bootstrap_summary(tree_folder, threshold):
    data_dir = 'covid_19/NCBI/'

    edge_dict = {}
    bootstrap_folder = data_dir + tree_folder
    output_folder = data_dir + '/tnet_output_complete/'
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    if not os.path.exists(output_folder + 'bootstrap_tnet_bias_100_th_' +
                          str(threshold) + '_summary.csv'):
        result = open(
            output_folder + 'bootstrap_tnet_bias_100_th_' + str(threshold) +
            '_summary.csv', 'w+')
        file_list = next(os.walk(bootstrap_folder))[2]

        for file in file_list:
            tnet_file = bootstrap_folder + '/' + file
            tnet_edges = ge.get_mul_tnet_edges(tnet_file, threshold)
            for edge in tnet_edges:
                if edge in edge_dict:
                    edge_dict[edge] += 1
                else:
                    edge_dict[edge] = 1

        edge_dict = dict(
            sorted(edge_dict.items(), key=operator.itemgetter(1),
                   reverse=True))
        for x, y in edge_dict.items():
            result.write('{},{}\n'.format(x, y))
def compare_sharptni_best_tree():
	data_dir = 'outputs/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
	F1_file = open('results/sharptni/best_tree.recall.sample_sankoff.csv', 'w+')
	F1_file.write('dataset,10,20,30,40,50,60,70,80,90,100\n')

	for folder in folders:
		print('inside folder: ',folder)
		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		sample_list = next(os.walk(data_dir + folder + '/sharptni'))[2]
		sharptni_file = [idx for idx in sample_list if idx.startswith('sample_sankoff_summary')]
		sharptni_file = sharptni_file[0]
		sample_num = int(sharptni_file.split('.')[1])
		print(sample_num)

		F1 = []
		for th in thresholds:
			thr = round(sample_num * (th / 100))
			tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/sharptni/' + sharptni_file, thr))
			temp = get_prec_rec_f1(real, tnet)
			F1.append(temp[1])

		F1_file.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]
						,F1[6],F1[7],F1[8],F1[9]))
def create_cdc_tnet_summary_directed(threshold):
    for outbreak in known_outbreaks:
        print('Inside', outbreak)
        input_folder = 'CDC/' + outbreak + '/tnet_new_bootstrap'
        output_folder = 'CDC/' + outbreak + '/tnet_new_bootstrap_summary_directed'
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)
        edge_dict = {}
        result = open(
            output_folder + '/tnet_new_bootstrap' + '_th_' + str(threshold) +
            '_summary.csv', 'w+')
        file_list = next(os.walk(input_folder))[2]

        for file in file_list:
            tnet_file = input_folder + '/' + file
            tnet_edges = ge.get_mul_tnet_edges(tnet_file, threshold)
            for edge in tnet_edges:
                if edge in edge_dict:
                    edge_dict[edge] += 1
                else:
                    edge_dict[edge] = 1

        edge_dict = dict(
            sorted(edge_dict.items(), key=operator.itemgetter(1),
                   reverse=True))
        for x, y in edge_dict.items():
            result.write('{},{}\n'.format(x, y))
def compare_tnet_cdc_single_tree():
	F1_file = open('results/cdc_single_tree_tnet/single_tree.f1.tnet.new.with.min.csv', 'w+')
	F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n')
	thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

	for outbreak in cdc.known_outbreaks:
		real = set(cdc.get_true_transmission_edges(outbreak))
		tnet_single = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.1.tnet_new_min', 0))
		single_run = get_prec_rec_f1(real, tnet_single)[2]

		F1 = []
		for th in thresholds:
			tnet = set(ge.get_mul_tnet_edges('CDC/' + outbreak + '/tnet_single_tree/single_tree.100.tnet_new_min', th))
			temp = get_prec_rec_f1(real, tnet)
			F1.append(temp[2])

		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(outbreak,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]
						,F1[6],F1[7],F1[8],F1[9]))
def compare_favites_sharptni_phyloscanner_tnet_new_tnet_bias_single_tree_single_run():
	F1_file = open('results/single_tree_sharptni/favites.phyloscanner.sharptni.min.coinf.tnet.new.tnet.bias.single_tree.single_run.csv', 'w+')
	F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,sharp_prec,sharp_rec,sharp_f1,tnet_prec,tnet_rec,tnet_f1,tnet_bias_prec,tnet_bias_rec,tnet_bias_f1\n')

	phylo_dir = '/home/saurav/research/FAVITES_compare_TNet_v2/outputs/'
	folders = next(os.walk('outputs/'))[1]
	for folder in folders:
		print('inside folder:', folder)
		F1 = []

		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		phylo = set(ge.get_phyloscanner_single_tree_edges(phylo_dir + folder + '/phyloscanner_best_tree/favites_collapsedTree.csv'))
		sharptni = set(ge.get_mul_tnet_edges('outputs/' + folder + '/sharptni_single/bestTree_sankoff_min_coinfection.1', 1))
		tnet = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.1.tnet_new', 1))
		tnet_bias = set(ge.get_mul_tnet_edges('outputs/' + folder + '/tnet_best_tree/bestTree.1.tnet_new_with_bias', 1))

		F1.extend(get_prec_rec_f1(real, phylo))
		F1.extend(get_prec_rec_f1(real, sharptni))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1.extend(get_prec_rec_f1(real, tnet_bias))
		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5],F1[6],F1[7],F1[8],F1[9],F1[10],F1[11]))

	F1_file.close()
def compare_tnet_single_run():
	data_dir = 'outputs/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	F1_file = open('results/single_tree_tnet/best_tree.tnet.new.50.csv', 'w+')
	F1_file.write('dataset,precision,recall,f1\n')

	for folder in folders:
		print('inside folder: ',folder)
		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		tnet_single = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', 50))

		F1 = get_prec_rec_f1(real, tnet_single)
		F1_file.write('{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2]))
Exemple #9
0
def compare_tnet_best_tree():
	data_dir = 'outputs/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	thresholds = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
	F1_file = open('results/single_tree_tnet/best_tree.recall.tnet.new.csv', 'w+')
	F1_file.write('dataset,single,10,20,30,40,50,60,70,80,90,100\n')

	for folder in folders:
		print('inside folder: ',folder)
		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		tnet_single = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.1.tnet_new', 0))
		single_run = get_prec_rec_f1(real, tnet_single)[1]

		F1 = []
		for th in thresholds:
			tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', th))

			temp = get_prec_rec_f1(real, tnet)
			F1.append(temp[1])

		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,single_run,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]
						,F1[6],F1[7],F1[8],F1[9]))
Exemple #10
0
def compare_phyloscanner_tnet_best_tree(threshold):
	data_dir = 'outputs/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	F1_file = open('results/best_tree.phyloscanner.tnet.new.th.'+str(threshold)+'.csv', 'w+')
	F1_file.write('dataset,phylo_prec,phylo_rec,phylo_f1,tnet_prec,tnet_rec,tnet_f1\n')

	for folder in folders:
		print('inside folder: ',folder)
		F1 = []

		real = set(ge.get_real_edges('dataset/' + folder + '/transmission_network.txt'))
		phylo = set(ge.get_phyloscanner_single_tree_edges(data_dir + folder + '/phyloscanner_best_tree/favites_collapsedTree.csv'))
		tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_best_tree/bestTree.100.tnet_new', threshold))

		F1.extend(get_prec_rec_f1(real, phylo))
		F1.extend(get_prec_rec_f1(real, tnet))
		F1_file.write('{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]))

	F1_file.close()
def compare_tnet_best_tree():
	data_dir = 'dataset/'
	folders = next(os.walk(data_dir))[1]
	folders.sort()

	thresholds = [50, 60, 70, 80, 90, 100]
	F1_file = open('results/old.prec.rec.f1.tnet.csv', 'w+')
	F1_file.write('dataset,prec_50,rec_50,f1_50,prec_60,rec_60,f1_60,prec_70,rec_70,f1_70,prec_80,rec_80,f1_80,prec_90,rec_90,f1_90,prec_100,rec_100,f1_100\n')

	for folder in folders:
		print('inside folder: ',folder)

		F1 = []
		for th in thresholds:
			real = set(ge.get_real_edges(data_dir + folder + '/transmission_network.txt'))
			tnet = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_old_100.tnet', th))
			# tnet_new = set(ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_100.tnet', th))

			temp = get_prec_rec_f1(real, tnet)
			F1.extend(temp)

		F1_file.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(folder,F1[0],F1[1],F1[2],F1[3],F1[4],F1[5]
						,F1[6],F1[7],F1[8],F1[9],F1[10],F1[11],F1[12],F1[13],F1[14],F1[15],F1[16],F1[17]))
Exemple #12
0
def compare_tnets_directed(th=50):
    data_dir = 'dataset/'
    folders = next(os.walk(data_dir))[1]
    folders.sort()

    TP_FP_FN_file = open(
        'directed.tnet.old.new.th_' + str(th) + '.TP_FP_FN.csv', 'w+')
    TP_FP_FN_file.write(
        'dataset,tnet_old_tp,tnet_old_fp,tnet_old_fn,tnet_new_tp,tnet_new_fp,tnet_new_fn\n'
    )
    F1_file = open('directed.tnet.old.new.th_' + str(th) + '.F1.csv', 'w+')
    F1_file.write(
        'dataset,tnet_old_prec,tnet_old_rec,tnet_old_f1,tnet_new_prec,tnet_new_rec,tnet_new_f1\n'
    )

    for folder in folders:
        print('inside folder: ', folder)

        TP_FP_FN = []
        F1 = []

        real = set(
            ge.get_real_edges(data_dir + folder + '/transmission_network.txt'))
        tnet_old = set(
            ge.get_mul_tnet_edges(data_dir + folder + '/tnet_old_100.tnet',
                                  50))
        tnet_new = set(
            ge.get_mul_tnet_edges(data_dir + folder + '/tnet_new_100.tnet',
                                  50))

        TP = len(real & tnet_old)
        FP = len(tnet_old - real)
        FN = len(real - tnet_old)
        try:
            precision = TP / (TP + FP)
            recall = TP / (TP + FN)
            f1 = 2 * (recall * precision) / (recall + precision)
        except ZeroDivisionError:
            precision = 0
            recall = 0
            f1 = 0

        TP_FP_FN.append(TP)
        TP_FP_FN.append(FP)
        TP_FP_FN.append(FN)
        F1.append(round(precision, 3))
        F1.append(round(recall, 3))
        F1.append(round(f1, 3))

        TP = len(real & tnet_new)
        FP = len(tnet_new - real)
        FN = len(real - tnet_new)
        try:
            precision = TP / (TP + FP)
            recall = TP / (TP + FN)
            f1 = 2 * (recall * precision) / (recall + precision)
        except ZeroDivisionError:
            precision = 0
            recall = 0
            f1 = 0

        TP_FP_FN.append(TP)
        TP_FP_FN.append(FP)
        TP_FP_FN.append(FN)
        F1.append(round(precision, 3))
        F1.append(round(recall, 3))
        F1.append(round(f1, 3))

        TP_FP_FN_file.write('{},{},{},{},{},{},{}\n'.format(
            folder, TP_FP_FN[0], TP_FP_FN[1], TP_FP_FN[2], TP_FP_FN[3],
            TP_FP_FN[4], TP_FP_FN[5]))
        F1_file.write('{},{},{},{},{},{},{}\n'.format(folder, F1[0], F1[1],
                                                      F1[2], F1[3], F1[4],
                                                      F1[5]))