def run_in_out_entropy_comparision(dataset, graphml_file, folder):
    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)

    avg_entropy_dict = {}
    avg_in_nx_entropy_dict = {}
    avg_out_nx_entropy_dict = {}
    avg_entropy_dict['dataset'] = dataset
    avg_in_nx_entropy_dict['dataset'] = dataset
    avg_out_nx_entropy_dict['dataset'] = dataset
    proteins = ['ha', 'na', 'm1', 'm2', 'np', 'pb1', 'pb2', 'pa', 'ns1', 'ns2']
    for protein in proteins:
        file = folder + os.sep + protein + '.afasta'
        alignment = AlignIO.read(file, 'fasta')
        sequences = [x.seq for x in alignment]
        entropies = util.entropy_all_positions(sequences)
        entropy_avg, in_network_entropy_avg, out_network_entropy_avg = in_out_entropies(
            entropies, in_residues[protein], folder, protein)
        avg_entropy_dict[protein] = entropy_avg
        avg_in_nx_entropy_dict[protein] = in_network_entropy_avg
        avg_out_nx_entropy_dict[protein] = out_network_entropy_avg
        print('averages for ', dataset, ' ', protein, ' ',
              in_network_entropy_avg, out_network_entropy_avg)

    return avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict
def get_cooccurences(graphml_file, fasta_folder):
    top_50_edges_cooccurence_counts = []
    bottom_50_edges_cooccurence_counts = []

    g = util.read_graphml(graphml_file)
    top_n_edges = util.get_edges(g, start=0, end=50, reverse=True)
    bottom_n_edges = util.get_edges(g, start=0, end=50, reverse=False)

    for edge in top_n_edges:
        residue1 = int(edge[0].split('_')[1]) - 1
        protein1 = edge[0].split('_')[0]
        residue2 = int(edge[1].split('_')[1]) - 1
        protein2 = edge[1].split('_')[0]
        file1 = fasta_folder + os.sep + protein1 + '.afasta'
        file2 = fasta_folder + os.sep + protein2 + '.afasta'
        rcc = perform_residue_analysis(file1, file2, residue1, residue2)
        print(edge)
        print(sorted(rcc.items(), key=operator.itemgetter(1), reverse=True))
        top_50_edges_cooccurence_counts.append(rcc)

    print('=====================')
    for edge in bottom_n_edges:
        residue1 = int(edge[0].split('_')[1]) - 1
        protein1 = edge[0].split('_')[0]
        residue2 = int(edge[1].split('_')[1]) - 1
        protein2 = edge[1].split('_')[0]
        file1 = fasta_folder + os.sep + protein1 + '.afasta'
        file2 = fasta_folder + os.sep + protein2 + '.afasta'
        rcc = perform_residue_analysis(file1, file2, residue1, residue2)
        print(sorted(rcc.items(), key=operator.itemgetter(1), reverse=True))
        bottom_50_edges_cooccurence_counts.append(rcc)

    return top_50_edges_cooccurence_counts, bottom_50_edges_cooccurence_counts
def run_in_out_acc_comparision(dataset, graphml_file, folder):

    avg_in_acc_dict = {}
    avg_out_acc_dict = {}
    avg_in_acc_dict['dataset'] = dataset
    avg_out_acc_dict['dataset'] = dataset

    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)
    dssp = DSSPData()
    avg_in, avg_out = run_in_out_acc_ha(in_residues, dssp, g, folder)
    avg_in_acc_dict['ha'] = avg_in
    avg_out_acc_dict['ha'] = avg_out

    avg_in, avg_out = run_in_out_acc_m1(in_residues, dssp, g, folder)
    avg_in_acc_dict['m1'] = avg_in
    avg_out_acc_dict['m1'] = avg_out

    avg_in, avg_out = run_in_out_acc_na(in_residues, dssp, g, folder)
    avg_in_acc_dict['na'] = avg_in
    avg_out_acc_dict['na'] = avg_out

    avg_in, avg_out = run_in_out_acc_np(in_residues, dssp, g, folder)
    avg_in_acc_dict['np'] = avg_in
    avg_out_acc_dict['np'] = avg_out

    avg_in, avg_out = run_in_out_acc_ns1(in_residues, dssp, g, folder)
    avg_in_acc_dict['ns1'] = avg_in
    avg_out_acc_dict['ns1'] = avg_out

    return avg_in_acc_dict, avg_out_acc_dict
def run_in_out_acc_comparision(dataset, graphml_file, folder):

    avg_in_acc_dict             = {}
    avg_out_acc_dict            = {}
    avg_in_acc_dict['dataset']  = dataset
    avg_out_acc_dict['dataset'] = dataset

    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)
    dssp = DSSPData()
    avg_in, avg_out = run_in_out_acc_ha(in_residues, dssp, g, folder)
    avg_in_acc_dict['ha'] = avg_in
    avg_out_acc_dict['ha'] = avg_out

    avg_in, avg_out = run_in_out_acc_m1(in_residues, dssp, g, folder)
    avg_in_acc_dict['m1'] = avg_in
    avg_out_acc_dict['m1'] = avg_out

    avg_in, avg_out = run_in_out_acc_na(in_residues, dssp, g, folder)
    avg_in_acc_dict['na'] = avg_in
    avg_out_acc_dict['na'] = avg_out

    avg_in, avg_out = run_in_out_acc_np(in_residues, dssp, g, folder)
    avg_in_acc_dict['np'] = avg_in
    avg_out_acc_dict['np'] = avg_out

    avg_in, avg_out = run_in_out_acc_ns1(in_residues, dssp, g, folder)
    avg_in_acc_dict['ns1'] = avg_in
    avg_out_acc_dict['ns1'] = avg_out

    return avg_in_acc_dict, avg_out_acc_dict
def get_cooccurences(graphml_file, fasta_folder):
    top_50_edges_cooccurence_counts     = []
    bottom_50_edges_cooccurence_counts  = []

    g = util.read_graphml(graphml_file)
    top_n_edges = util.get_edges(g, start=0, end=50, reverse=True)
    bottom_n_edges = util.get_edges(g, start=0, end=50, reverse=False)

    for edge in top_n_edges:
        residue1 = int(edge[0].split('_')[1])-1
        protein1 = edge[0].split('_')[0]
        residue2 = int(edge[1].split('_')[1])-1
        protein2 = edge[1].split('_')[0]
        file1 = fasta_folder + os.sep + protein1 + '.afasta'
        file2 = fasta_folder + os.sep + protein2 + '.afasta'
        rcc = perform_residue_analysis(file1, file2, residue1, residue2)
        print(edge)
        print(sorted(rcc.items(), key=operator.itemgetter(1), reverse=True))
        top_50_edges_cooccurence_counts.append(rcc)

    print('=====================')
    for edge in bottom_n_edges:
        residue1 = int(edge[0].split('_')[1])-1
        protein1 = edge[0].split('_')[0]
        residue2 = int(edge[1].split('_')[1])-1
        protein2 = edge[1].split('_')[0]
        file1 = fasta_folder + os.sep + protein1 + '.afasta'
        file2 = fasta_folder + os.sep + protein2 + '.afasta'
        rcc = perform_residue_analysis(file1, file2, residue1, residue2)
        print(sorted(rcc.items(), key=operator.itemgetter(1), reverse=True))
        bottom_50_edges_cooccurence_counts.append(rcc)

    return top_50_edges_cooccurence_counts, bottom_50_edges_cooccurence_counts
Ejemplo n.º 6
0
def run(infilename):
    ingraph = read_graphml(infilename)
    outgraph = create_protein_graph(ingraph)
    write_graphml(outgraph, infilename)
    plot_file = infilename.split('.')[0] + '.png'
    create_plot(outgraph, plot_file)

#run('u800d_05_B01b.graphml')
#compare_two_graphs('all_05_01.graphml', '800d_05_01.graphml')
Ejemplo n.º 7
0
def compare_two_graphs(file1, file2):
    g1 = read_graphml(file1)
    g2 = read_graphml(file2)

    nodes1 = g1.nodes()
    nodes2 = g2.nodes()

    matches = []
    in1only = []
    in2only = []
    for node in nodes1:
        if node in nodes2:
            matches.append(node)
        else:
            in1only.append(node)

    for node in nodes2:
        if node not in nodes1:
            in2only.append(node)

    print(len(nodes1), len(nodes2), len(matches), len(in1only), len(in2only))
Ejemplo n.º 8
0
def run_in_out_acc_comparision():
    graphml_file = sys.argv[1]
    folder       = sys.argv[2]

    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)
    dssp = DSSPData()
    run_in_out_acc_ha(in_residues, dssp, g, folder)
    run_in_out_acc_m1(in_residues, dssp, g, folder)
    run_in_out_acc_na(in_residues, dssp, g, folder)
    run_in_out_acc_np(in_residues, dssp, g, folder)
    run_in_out_acc_ns1(in_residues, dssp, g, folder)
Ejemplo n.º 9
0
def run(folder, infilename, title):
    ingraph = read_graphml(folder + os.sep + infilename)
    try:
        #avg_cluster_coeff = nx.average_clustering(ingraph)
        #print('average clustering for ' + title + " = " + str(avg_cluster_coeff))

        avg_deg_coeff = nx.average_degree_connectivity(ingraph)
        print('average degree for ' + title + ' = ' + str(avg_deg_coeff))
    except Exception as e:
        print (e.__str__())


    '''
Ejemplo n.º 10
0
def run_in_out_entropy_comparision():
    graphml_file = sys.argv[1]
    folder       = sys.argv[2]

    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)

    proteins = ['ha', 'na', 'm1', 'm2', 'np', 'pb1', 'pb2', 'pa', 'ns1', 'ns2']
    for protein in proteins:
        file = folder + os.sep + protein + '.afasta'
        alignment = AlignIO.read(file, 'fasta')
        sequences = [x.seq for x in alignment]
        entropies = util.entropy_all_positions(sequences)
        in_network_entropy_avg, out_network_entropy_avg = in_out_entropies(entropies, in_residues[protein], folder, protein)
        print('averages for ', protein, ' ', in_network_entropy_avg, out_network_entropy_avg)
Ejemplo n.º 11
0
def run_pairwise_comparision():
    graphml_file = sys.argv[1]
    folder = sys.argv[2]

    g = util.read_graphml(graphml_file)
    top_ten_edges = get_best_edges_from_graph(g, 10)

    for edge in top_ten_edges:
        residue1 = int(edge[0].split('_')[1])-1
        protein1 = edge[0].split('_')[0]
        residue2 = int(edge[1].split('_')[1])-1
        protein2 = edge[1].split('_')[0]
        print(protein1, protein2, residue1, residue2)
        file1 = folder + os.sep + protein1 + '.afasta'
        file2 = folder + os.sep + protein2 + '.afasta'
        perform_residue_analysis(file1, file2, residue1, residue2)
def run_in_out_entropy_comparision(dataset, graphml_file, folder):
    g = util.read_graphml(graphml_file)
    in_residues = get_residues_from_graph(g)

    avg_entropy_dict                    = {}
    avg_in_nx_entropy_dict              = {}
    avg_out_nx_entropy_dict             = {}
    avg_entropy_dict['dataset']         = dataset
    avg_in_nx_entropy_dict['dataset']   = dataset
    avg_out_nx_entropy_dict['dataset']  = dataset
    proteins = ['ha', 'na', 'm1', 'm2', 'np', 'pb1', 'pb2', 'pa', 'ns1', 'ns2']
    for protein in proteins:
        file = folder + os.sep + protein + '.afasta'
        alignment = AlignIO.read(file, 'fasta')
        sequences = [x.seq for x in alignment]
        entropies = util.entropy_all_positions(sequences)
        entropy_avg, in_network_entropy_avg, out_network_entropy_avg = in_out_entropies(entropies, in_residues[protein], folder, protein)
        avg_entropy_dict[protein] = entropy_avg
        avg_in_nx_entropy_dict[protein]  = in_network_entropy_avg
        avg_out_nx_entropy_dict[protein] = out_network_entropy_avg
        print('averages for ', dataset, ' ', protein, ' ', in_network_entropy_avg, out_network_entropy_avg)

    return avg_entropy_dict, avg_in_nx_entropy_dict, avg_out_nx_entropy_dict
def run(folder, infilename, title):
    ingraph = read_graphml(folder + '\\' + infilename)
    create_clustering_plot(ingraph, folder=folder, title=title)
    create_degree_plot(ingraph, folder=folder, title=title)
def run(folder, infilename, title):
    ingraph = read_graphml(folder + os.sep + infilename)
    outgraph = create_protein_graph(ingraph)
    write_macro_graphml(outgraph, folder, infilename)
    plot_file = infilename.split('.')[0] + '.png'
    create_plot(outgraph, folder, plot_file, title)