def run(input_file, folder, cutoff, out_folder):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)

    processes = []
    for line in lines:
        print("start processing ", line[0])
        file = folder+os.sep+line[0]
        p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]
    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))
Esempio n. 2
0
def run(input_file, folder, cutoff, out_folder):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)

    processes = []
    for line in lines:
        print("start processing ", line[0])
        file = folder + os.sep + line[0]
        p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]
    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))
def run2(input_file, folder, cutoff, target):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)
    out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target

    processes = []
    for line in lines:
        print("start processing ", line[0], line[1])
        file1 = folder + os.sep + line[0]
        file2 = folder + os.sep + line[1]
        if target == '0123':
            p = mp.Process(target=run_0123,
                           args=(file1, file2, cutoff, output))
        elif target == '01':
            p = mp.Process(target=run_01, args=(file1, file2, cutoff, output))
        elif target == 'B01a':
            p = mp.Process(target=run_B01a,
                           args=(file1, file2, cutoff, output))
        elif target == 'B01b':
            p = mp.Process(target=run_B01b,
                           args=(file1, file2, cutoff, output))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
def run2(input_file, folder, cutoff, target):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)
    out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target

    processes = []
    for line in lines:
        print("start processing ", line[0], line[1])
        file1 = folder+os.sep+line[0]
        file2 = folder+os.sep+line[1]
        if target == '0123':
            p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output))
        elif target == '01':
            p = mp.Process(target=run_01, args=(file1, file2, cutoff, output))
        elif target == 'B01a':
            p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output))
        elif target == 'B01b':
            p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
def run_cooccurence_all_datasets():
    #input_file = 'location of cooccurence_input.txt'
    input_file = sys.argv[1]
    lines = util.read_file(input_file)
    for line in lines:
        fasta_folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        plot_cooccurences(graphml_file=graphml_file, fasta_folder=fasta_folder, dataset=dataset)
        print('************************')
Esempio n. 6
0
def create_unique_sequences(fasta_file, unique_strains_file):
    sequences = AlignIO.read(fasta_file, 'fasta')
    strains = list(util.read_file(unique_strains_file))[0]
    new_fasta_sequences = []

    for sequence in sequences:
        strain_name = util.get_strain_name(sequence)
        if strain_name in strains:
            new_fasta_sequences.append(sequence)
            strains.remove(strain_name)

    return new_fasta_sequences
Esempio n. 7
0
def create_unique_sequences(fasta_file, unique_strains_file):
    sequences = AlignIO.read(fasta_file, 'fasta')
    strains = list(util.read_file(unique_strains_file))[0]
    new_fasta_sequences = []

    for sequence in sequences:
        strain_name = util.get_strain_name(sequence)
        if strain_name in strains:
            new_fasta_sequences.append(sequence)
            strains.remove(strain_name)

    return new_fasta_sequences
def run_cooccurence_all_datasets():
    #input_file = 'location of cooccurence_input.txt'
    input_file = sys.argv[1]
    lines = util.read_file(input_file)
    for line in lines:
        fasta_folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        plot_cooccurences(graphml_file=graphml_file,
                          fasta_folder=fasta_folder,
                          dataset=dataset)
        print('************************')
def run_in_out_acc_comparision_all_datasets():
    acc_in  = []
    acc_out = []
    input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\acc_input.txt'
    lines = util.read_file(input_file)
    for line in lines:
        folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        acc_in_dict, acc_out_dict = run_in_out_acc_comparision(dataset=dataset, graphml_file=graphml_file, folder=folder)
        acc_in.append(acc_in_dict)
        acc_out.append(acc_out_dict)
    util.write_avg_accs_to_csv(acc_in, filename='average_in_acc.csv')
    util.write_avg_accs_to_csv(acc_out, filename='average_out_acc.csv')
def run_in_out_acc_comparision_all_datasets():
    acc_in = []
    acc_out = []
    input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\acc_input.txt'
    lines = util.read_file(input_file)
    for line in lines:
        folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        acc_in_dict, acc_out_dict = run_in_out_acc_comparision(
            dataset=dataset, graphml_file=graphml_file, folder=folder)
        acc_in.append(acc_in_dict)
        acc_out.append(acc_out_dict)
    util.write_avg_accs_to_csv(acc_in, filename='average_in_acc.csv')
    util.write_avg_accs_to_csv(acc_out, filename='average_out_acc.csv')
def run_in_out_entropy_comparision_all_datasets():
    avg_entropies = []
    avg_in_entropies = []
    avg_out_entropies = []
    input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\entropy_input.txt'
    lines = util.read_file(input_file)
    for line in lines:
        folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict = run_in_out_entropy_comparision(dataset=dataset, graphml_file=graphml_file, folder=folder)
        avg_entropies.append(avg_entropy_dict)
        avg_in_entropies.append(avg_in_nx_entropy_dict)
        avg_out_entropies.append(avg_out_nx_entropy_dict)
    util.write_avg_entropies_to_csv(avg_entropies, filename='average_entropies.csv')
    util.write_avg_entropies_to_csv(avg_in_entropies, filename='average_in_entropies.csv')
    util.write_avg_entropies_to_csv(avg_out_entropies, filename='average_out_entropies.csv')
def run(input_file, folder, cutoff, target):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)
    out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target

    processes = []
    for line in lines:
        print("start processing ", line[0], line[1])
        file1 = folder + os.sep + line[0]
        file2 = folder + os.sep + line[1]
        if target == '0123':
            p = mp.Process(target=run_0123,
                           args=(file1, file2, cutoff, output))
        elif target == '01':
            p = mp.Process(target=run_01, args=(file1, file2, cutoff, output))
        elif target == 'B01a':
            p = mp.Process(target=run_B01a,
                           args=(file1, file2, cutoff, output))
        elif target == 'B01b':
            p = mp.Process(target=run_B01b,
                           args=(file1, file2, cutoff, output))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]

    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))
    mic_df = util.create_df_from_dict(all_mic_scores)
    util.create_graph(mic_df, out_filename)
def run_pairwise_counts_manual_edges():
    folder = sys.argv[2]
    significant_residues_file = 'pairs.txt'
    lines = util.read_file(significant_residues_file)
    logo_folder = 'C:\\uday\\gmu\\correlations\\results\\10proteins\\logos'
    for line in lines:
        print(line)
        protein1 = line[0]
        residue1 = int(line[1])-1
        protein2 = line[2]
        residue2 = int(line[3])-1
        file1 = folder + os.sep + protein1 + '.afasta'
        file2 = folder + os.sep + protein2 + '.afasta'
        residue_comparision_count = perform_residue_analysis(file1, file2, residue1, residue2)
        residue1_str = protein1+str(residue1+1)
        residue2_str = protein2+str(residue2+1)
        filename = residue1_str + '_' + residue2_str + '.bat'
        pu.create_image_magick_script(residue_comparision_count, residue1_str, residue2_str, logo_folder, filename)
def run(input_file, folder, cutoff, target):
    manager = mp.Manager()
    output = manager.Queue()

    all_mic_scores = []

    lines = util.read_file(input_file)
    out_filename = folder + '_' + cutoff.replace(".", "") + '_' + target

    processes = []
    for line in lines:
        print("start processing ", line[0], line[1])
        file1 = folder+os.sep+line[0]
        file2 = folder+os.sep+line[1]
        if target == '0123':
            p = mp.Process(target=run_0123, args=(file1, file2, cutoff, output))
        elif target == '01':
            p = mp.Process(target=run_01, args=(file1, file2, cutoff, output))
        elif target == 'B01a':
            p = mp.Process(target=run_B01a, args=(file1, file2, cutoff, output))
        elif target == 'B01b':
            p = mp.Process(target=run_B01b, args=(file1, file2, cutoff, output))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]

    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))
    mic_df = util.create_df_from_dict(all_mic_scores)
    util.create_graph(mic_df, out_filename)
def run_pairwise_counts_manual_edges():
    folder = sys.argv[2]
    significant_residues_file = 'pairs.txt'
    lines = util.read_file(significant_residues_file)
    logo_folder = 'C:\\uday\\gmu\\correlations\\results\\10proteins\\logos'
    for line in lines:
        print(line)
        protein1 = line[0]
        residue1 = int(line[1]) - 1
        protein2 = line[2]
        residue2 = int(line[3]) - 1
        file1 = folder + os.sep + protein1 + '.afasta'
        file2 = folder + os.sep + protein2 + '.afasta'
        residue_comparision_count = perform_residue_analysis(
            file1, file2, residue1, residue2)
        residue1_str = protein1 + str(residue1 + 1)
        residue2_str = protein2 + str(residue2 + 1)
        filename = residue1_str + '_' + residue2_str + '.bat'
        pu.create_image_magick_script(residue_comparision_count, residue1_str,
                                      residue2_str, logo_folder, filename)
def run_in_out_entropy_comparision_all_datasets():
    avg_entropies = []
    avg_in_entropies = []
    avg_out_entropies = []
    input_file = 'C:\\Users\\uday\\pycharm_projects\\network_analysis\\data\\entropy_input.txt'
    lines = util.read_file(input_file)
    for line in lines:
        folder = line[0]
        graphml_file = line[1]
        dataset = line[2]
        avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict = run_in_out_entropy_comparision(
            dataset=dataset, graphml_file=graphml_file, folder=folder)
        avg_entropies.append(avg_entropy_dict)
        avg_in_entropies.append(avg_in_nx_entropy_dict)
        avg_out_entropies.append(avg_out_nx_entropy_dict)
    util.write_avg_entropies_to_csv(avg_entropies,
                                    filename='average_entropies.csv')
    util.write_avg_entropies_to_csv(avg_in_entropies,
                                    filename='average_in_entropies.csv')
    util.write_avg_entropies_to_csv(avg_out_entropies,
                                    filename='average_out_entropies.csv')
    for line in lines:
        print("start processing ", line[0])
        file = folder+os.sep+line[0]
        p = mp.Process(target=run_01, args=(file, cutoff, output, out_folder))
        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]
    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))


if __name__ == '__main__':
    args_file = sys.argv[1]
    arglines = util.read_file(args_file)
    for argline in arglines:
        print("start processing ", argline[0], argline[1], argline[2], argline[3])
        run(argline[0], argline[1], argline[2], argline[3])
        print("done processing ", argline[0], argline[1], argline[2], argline[3])
    for p in processes:
        p.start()

    for p in processes:
        p.join()

    print('after join')
    results = [output.get() for p in processes]

    print('no. of results ', len(results))

    for list_of_mic_scores in results:
        if len(list_of_mic_scores) > 0:
            for mic_score in list_of_mic_scores:
                all_mic_scores.append(mic_score)

    print('no. of mic_scores ', len(all_mic_scores))
    mic_df = util.create_df_from_dict(all_mic_scores)
    util.create_graph(mic_df, out_filename)


if __name__ == '__main__':
    args_file = sys.argv[1]
    arglines = util.read_file(args_file)
    for argline in arglines:
        print("start processing ", argline[0], argline[1], argline[2],
              argline[3])
        run(argline[0], argline[1], argline[2], argline[3])
        print("done processing ", argline[0], argline[1], argline[2],
              argline[3])