def run(input_file, folder, cutoff, out_folder): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) processes = [] for line in lines: print("start processing ", line[0]) file = folder+os.sep+line[0] p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores))
def run(input_file, folder, cutoff, out_folder): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) processes = [] for line in lines: print("start processing ", line[0]) file = folder + os.sep + line[0] p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores))
def run2(input_file, folder, cutoff, target): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target processes = [] for line in lines: print("start processing ", line[0], line[1]) file1 = folder + os.sep + line[0] file2 = folder + os.sep + line[1] if target == '0123': p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output)) elif target == '01': p = mp.Process(target=run_01, args=(file1, file2, cutoff, output)) elif target == 'B01a': p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output)) elif target == 'B01b': p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join')
def run2(input_file, folder, cutoff, target): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target processes = [] for line in lines: print("start processing ", line[0], line[1]) file1 = folder+os.sep+line[0] file2 = folder+os.sep+line[1] if target == '0123': p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output)) elif target == '01': p = mp.Process(target=run_01, args=(file1, file2, cutoff, output)) elif target == 'B01a': p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output)) elif target == 'B01b': p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join')
def run_cooccurence_all_datasets(): #input_file = 'location of cooccurence_input.txt' input_file = sys.argv[1] lines = util.read_file(input_file) for line in lines: fasta_folder = line[0] graphml_file = line[1] dataset = line[2] plot_cooccurences(graphml_file=graphml_file, fasta_folder=fasta_folder, dataset=dataset) print('************************')
def create_unique_sequences(fasta_file, unique_strains_file): sequences = AlignIO.read(fasta_file, 'fasta') strains = list(util.read_file(unique_strains_file))[0] new_fasta_sequences = [] for sequence in sequences: strain_name = util.get_strain_name(sequence) if strain_name in strains: new_fasta_sequences.append(sequence) strains.remove(strain_name) return new_fasta_sequences
def run_in_out_acc_comparision_all_datasets(): acc_in = [] acc_out = [] input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\acc_input.txt' lines = util.read_file(input_file) for line in lines: folder = line[0] graphml_file = line[1] dataset = line[2] acc_in_dict, acc_out_dict = run_in_out_acc_comparision(dataset=dataset, graphml_file=graphml_file, folder=folder) acc_in.append(acc_in_dict) acc_out.append(acc_out_dict) util.write_avg_accs_to_csv(acc_in, filename='average_in_acc.csv') util.write_avg_accs_to_csv(acc_out, filename='average_out_acc.csv')
def run_in_out_acc_comparision_all_datasets(): acc_in = [] acc_out = [] input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\acc_input.txt' lines = util.read_file(input_file) for line in lines: folder = line[0] graphml_file = line[1] dataset = line[2] acc_in_dict, acc_out_dict = run_in_out_acc_comparision( dataset=dataset, graphml_file=graphml_file, folder=folder) acc_in.append(acc_in_dict) acc_out.append(acc_out_dict) util.write_avg_accs_to_csv(acc_in, filename='average_in_acc.csv') util.write_avg_accs_to_csv(acc_out, filename='average_out_acc.csv')
def run_in_out_entropy_comparision_all_datasets(): avg_entropies = [] avg_in_entropies = [] avg_out_entropies = [] input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\entropy_input.txt' lines = util.read_file(input_file) for line in lines: folder = line[0] graphml_file = line[1] dataset = line[2] avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict = run_in_out_entropy_comparision(dataset=dataset, graphml_file=graphml_file, folder=folder) avg_entropies.append(avg_entropy_dict) avg_in_entropies.append(avg_in_nx_entropy_dict) avg_out_entropies.append(avg_out_nx_entropy_dict) util.write_avg_entropies_to_csv(avg_entropies, filename='average_entropies.csv') util.write_avg_entropies_to_csv(avg_in_entropies, filename='average_in_entropies.csv') util.write_avg_entropies_to_csv(avg_out_entropies, filename='average_out_entropies.csv')
def run(input_file, folder, cutoff, target): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target processes = [] for line in lines: print("start processing ", line[0], line[1]) file1 = folder + os.sep + line[0] file2 = folder + os.sep + line[1] if target == '0123': p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output)) elif target == '01': p = mp.Process(target=run_01, args=(file1, file2, cutoff, output)) elif target == 'B01a': p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output)) elif target == 'B01b': p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores)) mic_df = util.create_df_from_dict(all_mic_scores) util.create_graph(mic_df, out_filename)
def run_pairwise_counts_manual_edges(): folder = sys.argv[2] significant_residues_file = 'pairs.txt' lines = util.read_file(significant_residues_file) logo_folder = 'C:\\uday\\gmu\\correlations\\results\\10proteins\\logos' for line in lines: print(line) protein1 = line[0] residue1 = int(line[1])-1 protein2 = line[2] residue2 = int(line[3])-1 file1 = folder + os.sep + protein1 + '.afasta' file2 = folder + os.sep + protein2 + '.afasta' residue_comparision_count = perform_residue_analysis(file1, file2, residue1, residue2) residue1_str = protein1+str(residue1+1) residue2_str = protein2+str(residue2+1) filename = residue1_str + '_' + residue2_str + '.bat' pu.create_image_magick_script(residue_comparision_count, residue1_str, residue2_str, logo_folder, filename)
def run(input_file, folder, cutoff, target): manager = mp.Manager() output = manager.Queue() all_mic_scores = [] lines = util.read_file(input_file) out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target processes = [] for line in lines: print("start processing ", line[0], line[1]) file1 = folder+os.sep+line[0] file2 = folder+os.sep+line[1] if target == '0123': p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output)) elif target == '01': p = mp.Process(target=run_01, args=(file1, file2, cutoff, output)) elif target == 'B01a': p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output)) elif target == 'B01b': p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores)) mic_df = util.create_df_from_dict(all_mic_scores) util.create_graph(mic_df, out_filename)
def run_pairwise_counts_manual_edges(): folder = sys.argv[2] significant_residues_file = 'pairs.txt' lines = util.read_file(significant_residues_file) logo_folder = 'C:\\uday\\gmu\\correlations\\results\\10proteins\\logos' for line in lines: print(line) protein1 = line[0] residue1 = int(line[1]) - 1 protein2 = line[2] residue2 = int(line[3]) - 1 file1 = folder + os.sep + protein1 + '.afasta' file2 = folder + os.sep + protein2 + '.afasta' residue_comparision_count = perform_residue_analysis( file1, file2, residue1, residue2) residue1_str = protein1 + str(residue1 + 1) residue2_str = protein2 + str(residue2 + 1) filename = residue1_str + '_' + residue2_str + '.bat' pu.create_image_magick_script(residue_comparision_count, residue1_str, residue2_str, logo_folder, filename)
def run_in_out_entropy_comparision_all_datasets(): avg_entropies = [] avg_in_entropies = [] avg_out_entropies = [] input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\entropy_input.txt' lines = util.read_file(input_file) for line in lines: folder = line[0] graphml_file = line[1] dataset = line[2] avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict = run_in_out_entropy_comparision( dataset=dataset, graphml_file=graphml_file, folder=folder) avg_entropies.append(avg_entropy_dict) avg_in_entropies.append(avg_in_nx_entropy_dict) avg_out_entropies.append(avg_out_nx_entropy_dict) util.write_avg_entropies_to_csv(avg_entropies, filename='average_entropies.csv') util.write_avg_entropies_to_csv(avg_in_entropies, filename='average_in_entropies.csv') util.write_avg_entropies_to_csv(avg_out_entropies, filename='average_out_entropies.csv')
for line in lines: print("start processing ", line[0]) file = folder+os.sep+line[0] p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder)) processes.append(p) for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores)) if __name__ == '__main__': args_file = sys.argv[1] arglines = util.read_file(args_file) for argline in arglines: print("start processing ", argline[0], argline[1], argline[2], argline[3]) run(argline[0], argline[1], argline[2], argline[3]) print("done processing ", argline[0], argline[1], argline[2], argline[3])
for p in processes: p.start() for p in processes: p.join() print('after join') results = [output.get() for p in processes] print('no. of results ', len(results)) for list_of_mic_scores in results: if len(list_of_mic_scores) > 0: for mic_score in list_of_mic_scores: all_mic_scores.append(mic_score) print('no. of mic_scores ', len(all_mic_scores)) mic_df = util.create_df_from_dict(all_mic_scores) util.create_graph(mic_df, out_filename) if __name__ == '__main__': args_file = sys.argv[1] arglines = util.read_file(args_file) for argline in arglines: print("start processing ", argline[0], argline[1], argline[2], argline[3]) run(argline[0], argline[1], argline[2], argline[3]) print("done processing ", argline[0], argline[1], argline[2], argline[3])