def test_to_networkx_digraph(): H = DirectedHypergraph() H.read("tests/data/basic_directed_hypergraph.txt") G = directed_graph_transformations.to_networkx_digraph(H) H_nodes = H.get_node_set() G_nodes = G.node.keys() assert H_nodes == set(G_nodes) H_nodes_attributes = [H.get_node_attributes(node) for node in H_nodes] for node in G_nodes: assert G.node[node] in H_nodes_attributes for hyperedge_id in H.hyperedge_id_iterator(): tail_set = H.get_hyperedge_tail(hyperedge_id) head_set = H.get_hyperedge_head(hyperedge_id) for tail_node in tail_set: for head_node in head_set: assert G[tail_node][head_node] # Try transforming an invalid directed hypergraph try: directed_graph_transformations.to_networkx_digraph("invalid H") assert False except TypeError: pass except BaseException as e: assert False, e
def test_from_networkx_digraph(): H = DirectedHypergraph() H.read("tests/data/basic_directed_hypergraph.txt") nxG = directed_graph_transformations.to_networkx_digraph(H) G = directed_graph_transformations.from_networkx_digraph(nxG) nxG_nodes = nxG.node.keys() G_nodes = G.get_node_set() assert G_nodes == set(nxG_nodes) for edge in nxG.edges_iter(): tail_node = edge[0] head_node = edge[1] assert G.has_hyperedge(tail_node, head_node) # Try transforming an invalid directed hypergraph try: directed_graph_transformations.from_networkx_digraph("G") assert False except TypeError: pass except BaseException as e: assert False, e
def main(inprefix, hedge_connectivity_file, pathway_prefix, infix, run_all): start = time.time() H, identifier2id, id2identifier = hgraph_utils.make_hypergraph( inprefix, keep_singleton_nodes=True) H = hgraph_utils.add_entity_set_info(H) G = transform.to_networkx_digraph(H) nodes = set() ## get proteins and complex members. node_membership = {} num_complexes = 0 num_entitysets = 0 for n in H.get_node_set(): attrs = H.get_node_attributes(n) if attrs['is_hypernode']: nodes.update(attrs['hypernode_members']) for m in attrs['hypernode_members']: if m not in node_membership: node_membership[m] = set() node_membership[m].add(n) num_complexes += 1 if attrs['is_entityset']: nodes.update(attrs['entityset_members']) for m in attrs['entityset_members']: if m not in node_membership: node_membership[m] = set() node_membership[m].add(n) num_entitysets += 1 nodes.add(n) if n not in node_membership: node_membership[n] = set([n]) print('%d complexes and %d entity sets' % (num_complexes, num_entitysets)) print('%d nodes including hypernode and entity set members' % (len(nodes))) # get pathway Identifiers to Uniprot ID pc2uniprot, uniprot2pc = hgraph_utils.get_id_map( '../../hypergraph/reactome_hypergraphs/') ## get pathway information pathway_nodes, all_pathway_nodes = get_pathways(pathway_prefix, run_all=run_all) #print('%d pathway nodes (including hypernode members)' % (len(all_pathway_nodes))) #print(list(all_pathway_nodes)[:10]) ## get channels files = glob.glob('../../data/STRING/processed/*.txt') print('%d files:' % (len(files)), files) processed_nodes = {} files = ['../../data/STRING/processed/cooccurence.txt'] for f in files: print('FILE %s' % (f)) name = f.replace('../../data/STRING/processed/', '').replace('.txt', '') print('NAME %s' % (name)) outfile_name = 'outfiles/%s-%s-positive_sets.txt' % (infix, name) if os.path.isfile(outfile_name): print('FILE %s EXISTS! Skipping.' % (outfile_name)) continue interactions = [] missing = {} with open(f) as fin: for line in fin: row = line.strip().split() interactions.append([row[2], row[3], int(row[4])]) print(' %d INTERACTIONS' % (len(interactions))) interactions_in_reactome = [] mismapped = 0 notinreactome = 0 for n1, n2, val in interactions: if n1 in uniprot2pc and n2 in uniprot2pc: un1 = uniprot2pc[n1] un2 = uniprot2pc[n2] else: if n1 not in uniprot2pc: missing[n1] = ('NA', 'NotInPC') if n2 not in uniprot2pc: missing[n2] = ('NA', 'NotInPC') mismapped += 1 continue if un1 in nodes and un2 in nodes: interactions_in_reactome.append([un1, un2, val]) else: if un1 not in nodes: missing[n1] = (un1, 'NotInHypergraph') if un2 not in nodes: missing[n2] = (un2, 'NotInHypergraph') notinreactome += 1 ## FOR TESTING: ##interactions_in_reactome = interactions_in_reactome[:500] print( ' %d INTERACTIONS HAVE BOTH NODES IN REACTOME\n %d interactions not in PathwayCommons Reactome mapping\n %d interactions are not in this hypergraph' % (len(interactions_in_reactome), mismapped, notinreactome)) out = open('outfiles/%s-%s-mismapped.txt' % (infix, name), 'w') out.write('#UniProtID\tPathwayCommonsID\tMismappingReason\n') for m in missing: out.write('%s\t%s\t%s\n' % (m, missing[m][0], missing[m][1])) out.close() print(' wrote %d mismapped nodes to outfiles/%s-%s-mismapped.txt' % (len(missing), infix, name)) sys.stdout.flush() interactions_in_pathways, interactions_in_same_pathway = get_pathway_interactions( interactions_in_reactome, pathway_nodes, all_pathway_nodes) print(' %d INTERACTIONS HAVE BOTH NODES IN THE REACTOME PATHWAYS' % (len(interactions_in_pathways))) print(' %d INTERACTIONS HAVE BOTH NODES IN SAME REACTOME PATHWAY' % (len(interactions_in_same_pathway))) sys.stdout.flush() ## NOTE: to do whole thing replace "intearactions-in_pathways" with "interactions-in_reactome" b_visit_dict = hgraph_utils.make_b_visit_dict(hedge_connectivity_file, identifier2id) brelax_dicts, processed_nodes = preprocess_brelax_dicts( H, interactions_in_pathways, node_membership, b_visit_dict, processed_nodes) interactions_brelax = get_bconn_interactions(brelax_dicts, interactions_in_pathways, node_membership) interactions_bipartite = list(interactions_brelax.keys()) interactions_bconn = [ e for e in interactions_bipartite if interactions_brelax[e] == 0 ] print(' %d INTERACTIONS ARE Bipartite CONNECTED IN REACTOME' % (len(interactions_bipartite))) print(' %d INTERACTIONS ARE B-CONNECTED IN REACTOME' % (len(interactions_bconn))) sys.stdout.flush() out = open(outfile_name, 'w') out.write( '#Node1\tNode2\tScore\tAnyPathway\tSamePathway\tBipartite\tBRelaxDist\n' ) for n1, n2, val in interactions_in_reactome: vals = [] if (n1, n2) in interactions_in_pathways: vals.append(1) else: vals.append(0) if (n1, n2) in interactions_in_same_pathway: vals.append(1) else: vals.append(0) if (n1, n2) in interactions_brelax: vals.append(1) vals.append(interactions_brelax[(n1, n2)]) else: vals.append(0) vals.append(-1) out.write('%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (n1, n2, val, vals[0], vals[1], vals[2], vals[3])) out.close() print(' wrote outfile to %s' (outfile_name)) sys.stdout.flush() end = time.time() print('FINAL TIME:', end - start)
def to_digraph(H): ## simple function so we convert to graph by calling an hgraph_utils function. G = transform.to_networkx_digraph(H) return G