コード例 #1
0
def test_to_networkx_digraph():
    H = DirectedHypergraph()
    H.read("tests/data/basic_directed_hypergraph.txt")

    G = directed_graph_transformations.to_networkx_digraph(H)

    H_nodes = H.get_node_set()
    G_nodes = G.node.keys()

    assert H_nodes == set(G_nodes)

    H_nodes_attributes = [H.get_node_attributes(node) for node in H_nodes]
    for node in G_nodes:
        assert G.node[node] in H_nodes_attributes

    for hyperedge_id in H.hyperedge_id_iterator():
        tail_set = H.get_hyperedge_tail(hyperedge_id)
        head_set = H.get_hyperedge_head(hyperedge_id)
        for tail_node in tail_set:
            for head_node in head_set:
                assert G[tail_node][head_node]

    # Try transforming an invalid directed hypergraph
    try:
        directed_graph_transformations.to_networkx_digraph("invalid H")
        assert False
    except TypeError:
        pass
    except BaseException as e:
        assert False, e
コード例 #2
0
def test_from_networkx_digraph():
    H = DirectedHypergraph()
    H.read("tests/data/basic_directed_hypergraph.txt")

    nxG = directed_graph_transformations.to_networkx_digraph(H)

    G = directed_graph_transformations.from_networkx_digraph(nxG)

    nxG_nodes = nxG.node.keys()
    G_nodes = G.get_node_set()

    assert G_nodes == set(nxG_nodes)

    for edge in nxG.edges_iter():
        tail_node = edge[0]
        head_node = edge[1]
        assert G.has_hyperedge(tail_node, head_node)

    # Try transforming an invalid directed hypergraph
    try:
        directed_graph_transformations.from_networkx_digraph("G")
        assert False
    except TypeError:
        pass
    except BaseException as e:
        assert False, e
コード例 #3
0
def main(inprefix, hedge_connectivity_file, pathway_prefix, infix, run_all):
    start = time.time()
    H, identifier2id, id2identifier = hgraph_utils.make_hypergraph(
        inprefix, keep_singleton_nodes=True)
    H = hgraph_utils.add_entity_set_info(H)
    G = transform.to_networkx_digraph(H)

    nodes = set()  ## get proteins and complex members.
    node_membership = {}
    num_complexes = 0
    num_entitysets = 0
    for n in H.get_node_set():
        attrs = H.get_node_attributes(n)
        if attrs['is_hypernode']:
            nodes.update(attrs['hypernode_members'])
            for m in attrs['hypernode_members']:
                if m not in node_membership:
                    node_membership[m] = set()
                node_membership[m].add(n)
            num_complexes += 1
        if attrs['is_entityset']:
            nodes.update(attrs['entityset_members'])
            for m in attrs['entityset_members']:
                if m not in node_membership:
                    node_membership[m] = set()
                node_membership[m].add(n)
            num_entitysets += 1
        nodes.add(n)
        if n not in node_membership:
            node_membership[n] = set([n])
    print('%d complexes and %d entity sets' % (num_complexes, num_entitysets))
    print('%d nodes including hypernode and entity set members' % (len(nodes)))

    # get pathway Identifiers to Uniprot ID
    pc2uniprot, uniprot2pc = hgraph_utils.get_id_map(
        '../../hypergraph/reactome_hypergraphs/')

    ## get pathway information
    pathway_nodes, all_pathway_nodes = get_pathways(pathway_prefix,
                                                    run_all=run_all)
    #print('%d pathway nodes (including hypernode members)' % (len(all_pathway_nodes)))
    #print(list(all_pathway_nodes)[:10])

    ## get channels
    files = glob.glob('../../data/STRING/processed/*.txt')
    print('%d files:' % (len(files)), files)

    processed_nodes = {}
    files = ['../../data/STRING/processed/cooccurence.txt']
    for f in files:
        print('FILE %s' % (f))
        name = f.replace('../../data/STRING/processed/',
                         '').replace('.txt', '')
        print('NAME %s' % (name))

        outfile_name = 'outfiles/%s-%s-positive_sets.txt' % (infix, name)
        if os.path.isfile(outfile_name):
            print('FILE %s EXISTS! Skipping.' % (outfile_name))
            continue

        interactions = []
        missing = {}
        with open(f) as fin:
            for line in fin:
                row = line.strip().split()
                interactions.append([row[2], row[3], int(row[4])])
        print('  %d INTERACTIONS' % (len(interactions)))

        interactions_in_reactome = []
        mismapped = 0
        notinreactome = 0
        for n1, n2, val in interactions:
            if n1 in uniprot2pc and n2 in uniprot2pc:
                un1 = uniprot2pc[n1]
                un2 = uniprot2pc[n2]
            else:
                if n1 not in uniprot2pc:
                    missing[n1] = ('NA', 'NotInPC')
                if n2 not in uniprot2pc:
                    missing[n2] = ('NA', 'NotInPC')
                mismapped += 1
                continue

            if un1 in nodes and un2 in nodes:
                interactions_in_reactome.append([un1, un2, val])
            else:
                if un1 not in nodes:
                    missing[n1] = (un1, 'NotInHypergraph')
                if un2 not in nodes:
                    missing[n2] = (un2, 'NotInHypergraph')
                notinreactome += 1

        ## FOR TESTING:
        ##interactions_in_reactome = interactions_in_reactome[:500]

        print(
            '  %d INTERACTIONS HAVE BOTH NODES IN REACTOME\n  %d interactions not in PathwayCommons Reactome mapping\n  %d interactions are not in this hypergraph'
            % (len(interactions_in_reactome), mismapped, notinreactome))
        out = open('outfiles/%s-%s-mismapped.txt' % (infix, name), 'w')
        out.write('#UniProtID\tPathwayCommonsID\tMismappingReason\n')
        for m in missing:
            out.write('%s\t%s\t%s\n' % (m, missing[m][0], missing[m][1]))
        out.close()
        print('  wrote %d mismapped nodes to outfiles/%s-%s-mismapped.txt' %
              (len(missing), infix, name))
        sys.stdout.flush()

        interactions_in_pathways, interactions_in_same_pathway = get_pathway_interactions(
            interactions_in_reactome, pathway_nodes, all_pathway_nodes)
        print('  %d INTERACTIONS HAVE BOTH NODES IN THE REACTOME PATHWAYS' %
              (len(interactions_in_pathways)))
        print('  %d INTERACTIONS HAVE BOTH NODES IN SAME REACTOME PATHWAY' %
              (len(interactions_in_same_pathway)))
        sys.stdout.flush()

        ## NOTE: to do whole thing replace "intearactions-in_pathways" with "interactions-in_reactome"
        b_visit_dict = hgraph_utils.make_b_visit_dict(hedge_connectivity_file,
                                                      identifier2id)
        brelax_dicts, processed_nodes = preprocess_brelax_dicts(
            H, interactions_in_pathways, node_membership, b_visit_dict,
            processed_nodes)
        interactions_brelax = get_bconn_interactions(brelax_dicts,
                                                     interactions_in_pathways,
                                                     node_membership)
        interactions_bipartite = list(interactions_brelax.keys())
        interactions_bconn = [
            e for e in interactions_bipartite if interactions_brelax[e] == 0
        ]

        print('  %d INTERACTIONS ARE Bipartite CONNECTED IN REACTOME' %
              (len(interactions_bipartite)))
        print('  %d INTERACTIONS ARE B-CONNECTED IN REACTOME' %
              (len(interactions_bconn)))
        sys.stdout.flush()

        out = open(outfile_name, 'w')
        out.write(
            '#Node1\tNode2\tScore\tAnyPathway\tSamePathway\tBipartite\tBRelaxDist\n'
        )
        for n1, n2, val in interactions_in_reactome:
            vals = []
            if (n1, n2) in interactions_in_pathways:
                vals.append(1)
            else:
                vals.append(0)
            if (n1, n2) in interactions_in_same_pathway:
                vals.append(1)
            else:
                vals.append(0)
            if (n1, n2) in interactions_brelax:
                vals.append(1)
                vals.append(interactions_brelax[(n1, n2)])
            else:
                vals.append(0)
                vals.append(-1)
            out.write('%s\t%s\t%s\t%d\t%d\t%d\t%d\n' %
                      (n1, n2, val, vals[0], vals[1], vals[2], vals[3]))
        out.close()
        print('  wrote outfile to %s' (outfile_name))
        sys.stdout.flush()
    end = time.time()
    print('FINAL TIME:', end - start)
コード例 #4
0
def to_digraph(H):
    ## simple function so we convert to graph by calling an hgraph_utils function.
    G = transform.to_networkx_digraph(H)
    return G