def get_subgraph_by_neighborhood(graph, nodes): """Gets a BEL graph around the neighborhoods of the given nodes :param pybel.BELGraph graph: A BEL graph :param nodes: An iterable of BEL nodes :type nodes: iter :return: A BEL graph induced around the neighborhoods of the given nodes :rtype: pybel.BELGraph """ result = BELGraph() node_set = set(nodes) for node in node_set: if node not in graph: raise ValueError('{} not in graph'.format(node)) for u, v, k, d in graph.in_edges_iter(nodes, keys=True, data=True): result.add_edge(u, v, key=k, attr_dict=d) for u, v, k, d in graph.out_edges_iter(nodes, keys=True, data=True): result.add_edge(u, v, key=k, attr_dict=d) for node in result.nodes_iter(): result.node[node].update(graph.node[node]) return result
def help_test_convert( self, converter: Type[Converter], u: BaseEntity, v: BaseEntity, edge_data: EdgeData, triple: Tuple[str, str, str], ) -> None: """Test a converter class.""" self.assertTrue(issubclass(converter, Converter), msg='Not a Converter: {}'.format(converter.__name__)) key = n() self.assertTrue( converter.predicate(u, v, key, edge_data), msg='Predicate failed: {}'.format(converter.__name__), ) self.assertEqual( triple, converter.convert(u, v, key, edge_data), msg='Conversion failed: {}'.format(converter.__name__), ) graph = BELGraph() graph.add_edge(u, v, key=key, **edge_data) self.assertEqual( triple, get_triple(graph, u, v, key), msg='get_triple failed: {}'.format(converter.__name__), )
def enrich_metabolites_proteins(graph: BELGraph, manager: Optional[Manager] = None): """Enrich a given BEL graph, which includes metabolites with proteins, that are associated to the metabolites.""" if manager is None: manager = Manager() for node in list(graph): if _check_namespaces(node, ABUNDANCE, 'HMDB'): metabolite_protein_interactions = manager.query_metabolite_associated_proteins(node[NAME]) else: continue if not metabolite_protein_interactions: log.warning("Unable to find node: %s", node) continue for association in metabolite_protein_interactions: protein_data = association.protein.serialize_to_bel() protein_tuple = graph.add_node_from_data(protein_data) graph.add_edge(protein_tuple, node, attr_dict={ RELATION: ASSOCIATION, EVIDENCE: None, CITATION: { CITATION_TYPE: None, CITATION_REFERENCE: None, }, ANNOTATIONS: { 'name': association.protein.name, 'protein_type': association.protein.protein_type } })
def test_all_filter(self): graph = BELGraph() graph.add_edge(1, 2, annotations={'A': {'1', '2', '3'}}) self.assertEqual( 1, count_passed_edge_filter( graph, build_annotation_dict_all_filter({'A': {'1'}}))) self.assertEqual( 1, count_passed_edge_filter( graph, build_annotation_dict_all_filter({'A': {'1', '2'}}))) self.assertEqual( 1, count_passed_edge_filter( graph, build_annotation_dict_all_filter({'A': {'1', '2', '3'}}))) self.assertEqual( 0, count_passed_edge_filter( graph, build_annotation_dict_all_filter({'A': {'1', '2', '3', '4'}}))) self.assertEqual( 0, count_passed_edge_filter( graph, build_annotation_dict_all_filter({'A': {'4'}})))
def test_has_polarity(self): g = BELGraph() g.add_edge(1, 2, key=0, relation=INCREASES) self.assertTrue(has_polarity(g, 1, 2, 0)) g.add_edge(2, 3, key=0, relation=ASSOCIATION) self.assertFalse(has_polarity(g, 2, 3, 0))
def test_build_is_association(self): """Test build_relation_predicate.""" alternate_is_associative_relation = build_relation_predicate(ASSOCIATION) g = BELGraph() g.add_edge(p1, p2, key=0, **{RELATION: ASSOCIATION}) g.add_edge(p2, p3, key=0, **{RELATION: INCREASES}) self.assertTrue(alternate_is_associative_relation(g, p1, p2, 0)) self.assertFalse(alternate_is_associative_relation(g, p2, p3, 0))
class TestExpandNeighborhood(unittest.TestCase): def setUp(self): self.universe = BELGraph() self.universe.add_edge(1, 2) self.universe.add_edge(2, 3) self.universe.add_edge(3, 7) self.universe.add_edge(1, 4) self.universe.add_edge(1, 5) self.universe.add_edge(5, 6) self.universe.add_edge(8, 2) self.graph = BELGraph() self.graph.add_edge(1, 2) def test_expand_failure(self): self.graph.add_node(0) with self.assertRaises(Exception): expand_node_neighborhood(self.universe, self.graph, 0) def test_expand_add(self): self.assertNotIn(3, self.graph) expand_node_neighborhood(self.universe, self.graph, 3) self.assertIn(3, self.graph) self.assertIn(7, self.graph) self.assertIn(7, self.graph.edge[3]) def test_expand_successors(self): expand_node_neighborhood(self.universe, self.graph, 1) self.assertIn(4, self.graph) self.assertIn(5, self.graph) self.assertIn(5, self.graph.edge[1]) def test_expand_predecessors(self): expand_node_neighborhood(self.universe, self.graph, 2) self.assertIn(8, self.graph) self.assertIn(2, self.graph.edge[8]) def test_expand_all_neighborhoods(self): expand_all_node_neighborhoods(self.universe, self.graph) self.assertIn(3, self.graph) self.assertIn(3, self.graph.edge[2]) self.assertIn(4, self.graph) self.assertIn(4, self.graph.edge[1]) self.assertIn(5, self.graph) self.assertIn(5, self.graph.edge[1]) self.assertIn(8, self.graph) self.assertIn(2, self.graph.edge[8])
def test_variants_mapping(self): g = BELGraph() g.add_node('APP', attr_dict={NAMESPACE: 'HGNC', NAME: 'APP'}) g.add_node('APP Fragment') g.add_edge('APP', 'APP Fragment', **{RELATION: HAS_VARIANT}) mapped_nodes = get_mapped_nodes(g, 'HGNC', {'APP'}) self.assertEqual(1, len(mapped_nodes)) self.assertIn('APP', mapped_nodes) self.assertEqual({'APP Fragment'}, mapped_nodes['APP'])
def test_build_is_increases_or_decreases(self): """Test build_relation_predicate with multiple relations.""" is_increase_or_decrease = build_relation_predicate([INCREASES, DECREASES]) g = BELGraph() g.add_edge(p1, p2, key=0, **{RELATION: ASSOCIATION}) g.add_edge(p2, p3, key=0, **{RELATION: INCREASES}) g.add_edge(p3, p4, key=0, **{RELATION: DECREASES}) self.assertFalse(is_increase_or_decrease(g, p1, p2, 0)) self.assertTrue(is_increase_or_decrease(g, p2, p3, 0)) self.assertTrue(is_increase_or_decrease(g, p3, p4, 0))
def test_remove_isolated_out_of_place(self): """Test removing isolated nodes (out-of-place).""" g = BELGraph() g.add_edge(1, 2) g.add_edge(2, 3) g.add_node(4) g = remove_isolated_nodes_op(g) self.assertEqual(3, g.number_of_nodes()) self.assertEqual(2, g.number_of_edges())
def test_orthologus_mapping(self): g = BELGraph() g.add_node('Ccl2', attr_dict={NAMESPACE: 'MGI', NAME: 'Ccl2'}) g.add_node('CCL2', attr_dict={NAMESPACE: 'HGNC', NAME: 'CCL2'}) g.add_edge('CCL2', 'Ccl2', **{RELATION: ORTHOLOGOUS}) mapped_nodes = get_mapped_nodes(g, 'HGNC', {'CCL2'}) self.assertEqual(1, len(mapped_nodes)) self.assertIn('CCL2', mapped_nodes) self.assertEqual({'Ccl2'}, mapped_nodes['CCL2'])
def test_complexes_composites_mapping(self): g = BELGraph() g.add_node('complex(p(HGNC:CCL2), p(HGNC:CCR2))') g.add_node('CCL2', attr_dict={NAMESPACE: 'HGNC', NAME: 'CCL2'}) g.add_node('CCR2', attr_dict={NAMESPACE: 'HGNC', NAME: 'CCR2'}) g.add_node('chemokine protein family') g.add_edge('chemokine protein family', 'CCL2', **{RELATION: HAS_MEMBER}) g.add_edge('chemokine protein family', 'CCR2', **{RELATION: HAS_MEMBER}) g.add_edge('complex(p(HGNC:CCL2), p(HGNC:CCR2))', 'CCL2', **{RELATION: HAS_COMPONENT}) g.add_edge('complex(p(HGNC:CCL2), p(HGNC:CCR2))', 'CCR2', **{RELATION: HAS_COMPONENT}) mapped_nodes = get_mapped_nodes(g, 'HGNC', {'CCL2', 'CCR2'}) self.assertEqual(2, len(mapped_nodes)) self.assertIn('CCL2', mapped_nodes) self.assertIn('CCR2', mapped_nodes) self.assertEqual( { 'complex(p(HGNC:CCL2), p(HGNC:CCR2))', 'chemokine protein family' }, mapped_nodes['CCR2']) self.assertEqual( { 'complex(p(HGNC:CCL2), p(HGNC:CCR2))', 'chemokine protein family' }, mapped_nodes['CCR2'])
def collapse_consistent_edges(graph: BELGraph): """Collapse consistent edges together. .. warning:: This operation doesn't preserve evidences or other annotations """ for u, v in graph.edges(): relation = pair_is_consistent(graph, u, v) if not relation: continue edges = [(u, v, k) for k in graph[u][v]] graph.remove_edges_from(edges) graph.add_edge(u, v, attr_dict={RELATION: relation})
def multi_relabel( graph: BELGraph, mapping_dict: Mapping[BaseEntity, Iterable[BaseEntity]]) -> None: """Expand one victim to multiple survivor nodes, in place.""" for victim, survivors in mapping_dict.items(): for survivor in survivors: for u, _, k, d in graph.in_edges(victim, keys=True, data=True): graph.add_edge(u, survivor, key=k, **d) for _, v, k, d in graph.out_edges(victim, keys=True, data=True): graph.add_edge(survivor, v, key=k, **d) graph.remove_nodes_from(mapping_dict.keys())
def test_collapse_dogma_1(self): graph = BELGraph() graph.add_simple_node(*p1) graph.add_simple_node(*r1) graph.add_edge(r1, p1, key=unqualified_edge_code[TRANSLATED_TO], **{RELATION: TRANSLATED_TO}) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) collapse_by_central_dogma(graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges())
def enrich_diseases_metabolites(graph: BELGraph, manager: Optional[Manager] = None): """Enrich a given BEL graph, which includes HMDB diseases with HMDB metabolites, which are associated to the diseases.""" if manager is None: manager = Manager() for data in list(graph): if _check_namespaces(data, PATHOLOGY, 'HMDB_D'): disease_metabolite_interactions = manager.query_disease_associated_metabolites(data[NAME]) else: continue if not disease_metabolite_interactions: log.warning("Unable to find node: %s", data) continue # add edges and collect all the references for this edge i = 0 while i < len(disease_metabolite_interactions): association = disease_metabolite_interactions[i] references = [] # list for storing the reference articles old_metabolite = association.metabolite while True: # collect the references for the metabolite disease interaction try: if old_metabolite != disease_metabolite_interactions[i].metabolite: break # break if disease has changed references.append(disease_metabolite_interactions[i].reference.pubmed_id) i += 1 except IndexError: break # add disease node and construct edge metabolite_data = association.metabolite.serialize_to_bel() metabolite_tuple = graph.add_node_from_data(metabolite_data) graph.add_edge(metabolite_tuple, data, attr_dict={ RELATION: ASSOCIATION, EVIDENCE: None, CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: references[0], }, ANNOTATIONS: { 'omim_id': association.disease.omim_id, 'additional_references': references[1::] } })
def test_causal_source(self): g = BELGraph() g.add_edge(1, 2, relation=INCREASES) g.add_edge(2, 3, relation=INCREASES) self.assertTrue(is_causal_source(g, 1)) self.assertFalse(is_causal_central(g, 1)) self.assertFalse(is_causal_sink(g, 1)) self.assertFalse(is_causal_source(g, 2)) self.assertTrue(is_causal_central(g, 2)) self.assertFalse(is_causal_sink(g, 2)) self.assertFalse(is_causal_source(g, 3)) self.assertFalse(is_causal_central(g, 3)) self.assertTrue(is_causal_sink(g, 3))
def get_downstream_causal_subgraph(graph, nbunch): """Induces a subgraph from all of the downstream causal entities of the nodes in the nbunch :param pybel.BELGraph graph: A BEL graph :param nbunch: A BEL node or iterable of BEL nodes :type nbunch: tuple or list of tuples :return: A BEL Graph :rtype: pybel.BELGraph """ result = BELGraph() for u, v, k, d in graph.out_edges_iter(nbunch, keys=True, data=True): if d[RELATION] in CAUSAL_RELATIONS: result.add_edge(u, v, key=k, attr_dict=d) _update_node_helper(graph, result) return result
def expand_internal_causal(universe: BELGraph, graph: BELGraph) -> None: """Add causal edges between entities in the sub-graph. Is an extremely thin wrapper around :func:`expand_internal`. :param universe: A BEL graph representing the universe of all knowledge :param graph: The target BEL graph to enrich with causal relations between contained nodes Equivalent to: >>> from pybel.struct import expand_internal, is_causal_relation >>> expand_internal(universe, graph, edge_predicates=is_causal_relation) """ for u, v, key in pybel.struct.mutation.expansion.neighborhood.iterate_internal( universe, graph): data = universe.edges[u][v][key] if is_causal_relation(data): graph.add_edge(u, v, key=key, **data)
class TestNodeFilters(unittest.TestCase): def setUp(self): self.universe = BELGraph() self.universe.add_edge(1, 2) self.universe.add_edge(2, 3) self.universe.add_edge(3, 7) self.universe.add_edge(1, 4) self.universe.add_edge(1, 5) self.universe.add_edge(5, 6) self.universe.add_edge(8, 2) self.graph = BELGraph() self.graph.add_edge(1, 2) def test_keep_permissive(self): nodes = set(filter_nodes(self.universe, keep_node_permissive)) self.assertEqual({1, 2, 3, 4, 5, 6, 7, 8}, nodes)
def get_subgraph_by_edge_filter(graph, edge_filters): """Induces a subgraph on all edges that pass the given filters :param pybel.BELGraph graph: A BEL graph :param edge_filters: A predicate or list of predicates (graph, node, node, key, data) -> bool :type edge_filters: list or tuple or lambda :return: A BEL subgraph induced over the edges passing the given filters :rtype: pybel.BELGraph """ result = BELGraph() for u, v, k, d in filter_edges(graph, edge_filters): result.add_edge(u, v, key=k, attr_dict=d) for node in result.nodes_iter(): result.node[node].update(graph.node[node]) return result
def expand_periphery( universe: BELGraph, graph: BELGraph, node_predicates: Optional[NodePredicates] = None, edge_predicates: Optional[EdgePredicates] = None, threshold: int = 2, ) -> None: """Iterate over all possible edges, peripheral to a given subgraph, that could be added from the given graph. Edges could be added if they go to nodes that are involved in relationships that occur with more than the threshold (default 2) number of nodes in the subgraph. :param universe: The universe of BEL knowledge :param graph: The (sub)graph to expand :param threshold: Minimum frequency of betweenness occurrence to add a gap node A reasonable edge filter to use is :func:`pybel_tools.filters.keep_causal_edges` because this function can allow for huge expansions if there happen to be hub nodes. """ nd = get_subgraph_peripheral_nodes( universe, graph, node_predicates=node_predicates, edge_predicates=edge_predicates, ) for node, dd in nd.items(): pred_d = dd['predecessor'] succ_d = dd['successor'] in_subgraph_connections = set(pred_d) | set(succ_d) if threshold > len(in_subgraph_connections): continue graph.add_node(node, attr_dict=universe[node]) for u, edges in pred_d.items(): for key, data in edges: graph.add_edge(u, node, key=key, **data) for v, edges in succ_d.items(): for key, data in edges: graph.add_edge(node, v, key=key, **data)
def make_graph_2(): graph = BELGraph( name='PyBEL Tools Example Network 2', version='1.0.0', description='Example Network for PyBEL Tools Tests', authors='Daniel Domingo-Fernández and Charles Tapley Hoyt', contact='*****@*****.**', ) graph.add_node_from_data(gene_f) graph.add_node_from_data(protein_e) graph.add_node_from_data(protein_b) graph.add_edge(protein_e_tuple, protein_b_tuple, attr_dict={ RELATION: INCREASES, CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: '1', }, EVIDENCE: 'Evidence 1', ANNOTATIONS: { 'Annotation': 'foo' } }) graph.add_edge(gene_f_tuple, protein_e_tuple, attr_dict={ RELATION: INCREASES, CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: '2', }, EVIDENCE: 'Evidence 2', ANNOTATIONS: { 'Annotation': 'foo2' } }) return graph
def setUp(self): super(TestCitations, self).setUp() self.pmid = "9611787" g = BELGraph() g.add_node(1) g.add_node(2) g.add_edge(1, 2, attr_dict={ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: self.pmid } }) self.graph = g
def test_collapse_1(self): graph = BELGraph() graph.add_simple_node(*p1) graph.add_simple_node(*p2) graph.add_simple_node(*p3) graph.add_edge(p1, p3, **{RELATION: INCREASES}) graph.add_edge(p2, p3, **{RELATION: DIRECTLY_INCREASES}) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges()) d = { p1: {p2} } collapse_nodes(graph, d) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges(), msg=graph.edges(data=True, keys=True))
def test_randomly_select_node_1(self): """Tests that randomly selecting nodes works""" a, b, c, d = (n() for _ in range(4)) g = BELGraph() g.add_edge(a, b) g.add_edge(b, c) g.add_edge(b, d) self.assertEqual(1, g.degree(a)) self.assertEqual(3, g.degree(b)) self.assertEqual(1, g.degree(c)) self.assertEqual(1, g.degree(d)) no_grow = set() node_counter = Counter( randomly_select_node(g, no_grow, self.random_state) for _ in range(self.trials)) self.assertIn(a, node_counter) self.assertAlmostEqual((1 / 6), node_counter[a] / self.trials, places=2) self.assertIn(b, node_counter) self.assertAlmostEqual((3 / 6), node_counter[b] / self.trials, places=2) self.assertIn(c, node_counter) self.assertAlmostEqual((1 / 6), node_counter[c] / self.trials, places=2) self.assertIn(d, node_counter) self.assertAlmostEqual((1 / 6), node_counter[d] / self.trials, places=2)
def test_separate_unstable(self): graph = BELGraph() a = PROTEIN, 'HGNC', 'A' b = PROTEIN, 'HGNC', 'B' c = PROTEIN, 'HGNC', 'C' d = PROTEIN, 'HGNC', 'D' graph.add_simple_node(*a) graph.add_simple_node(*b) graph.add_simple_node(*c) graph.add_simple_node(*d) graph.add_edge(a, b, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(a, c, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(c, b, **{RELATION: NEGATIVE_CORRELATION}) infer_missing_two_way_edges(graph) cg = get_correlation_graph(graph) self.assertIn(a, cg) self.assertIn(b, cg) self.assertIn(c, cg) self.assertTrue(cg.has_edge(a, b)) self.assertTrue(cg.has_edge(a, c)) self.assertTrue(cg.has_edge(b, c)) self.assertIn(POSITIVE_CORRELATION, cg.edge[a][b]) self.assertIn(POSITIVE_CORRELATION, cg.edge[a][c]) self.assertIn(NEGATIVE_CORRELATION, cg.edge[c][b]) triangles = tuple(get_correlation_triangles(cg)) self.assertEqual(1, len(triangles)) self.assertEqual((a, b, c), triangles[0]) result = tuple(get_separate_unstable_correlation_triples(graph)) self.assertEqual(1, len(result)) self.assertEqual((a, b, c), result[0])
def test_separate_unstable(self): graph = BELGraph() a = Protein('HGNC', 'A') b = Protein('HGNC', 'B') c = Protein('HGNC', 'C') d = Protein('HGNC', 'D') graph.add_node_from_data(a) graph.add_node_from_data(b) graph.add_node_from_data(c) graph.add_node_from_data(d) graph.add_edge(a, b, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(a, c, **{RELATION: POSITIVE_CORRELATION}) graph.add_edge(c, b, **{RELATION: NEGATIVE_CORRELATION}) infer_missing_two_way_edges(graph) cg = get_correlation_graph(graph) self.assertIn(a, cg) self.assertIn(b, cg) self.assertIn(c, cg) self.assertTrue(cg.has_edge(a, b)) self.assertTrue(cg.has_edge(a, c)) self.assertTrue(cg.has_edge(b, c)) self.assertIn(POSITIVE_CORRELATION, cg[a][b]) self.assertIn(POSITIVE_CORRELATION, cg[a][c]) self.assertIn(NEGATIVE_CORRELATION, cg[c][b]) triangles = tuple(get_correlation_triangles(cg)) self.assertEqual(1, len(triangles)) self.assertEqual((a, b, c), triangles[0]) result = tuple(get_separate_unstable_correlation_triples(graph)) self.assertEqual(1, len(result)) self.assertEqual((a, b, c), result[0])
def setUp(self): g = BELGraph() g.add_edge(1, 2) g.add_edge(1, 3) g.add_edge(8, 3) h = BELGraph() h.add_edge(1, 3) h.add_edge(1, 4) h.add_edge(5, 6) h.add_node(7) self.g = g self.h = h
def setUp(self): g = BELGraph() g.add_edge(p1, p2) g.add_edge(p1, p3) g.add_edge(p8, p3) h = BELGraph() h.add_edge(p1, p3) h.add_edge(p1, p4) h.add_edge(p5, p6) h.add_node(p7) self.g = g self.h = h