def assert_has_edge(self: unittest.TestCase, u: BaseEntity, v: BaseEntity, graph: BELGraph, permissive: bool = True, **kwargs): """A helper function for checking if an edge with the given properties is contained within a graph.""" self.assertIsInstance(u, BaseEntity) self.assertIsInstance(v, BaseEntity) self.assertTrue(graph.has_edge(u, v), msg='Edge ({}, {}) not in graph. Other edges:\n{}'.format( u, v, '\n'.join( edge_to_bel(u, v, d) for u, v, d in graph.edges(data=True)))) if not kwargs: return if permissive: matches = any_subdict_matches(graph[u][v], kwargs) else: matches = any_dict_matches(graph[u][v], kwargs) msg = 'No edge ({}, {}) with correct properties. expected:\n {}\nbut got:\n{}'.format( u, v, dumps(kwargs, indent=2, sort_keys=True), str(graph[u][v])) self.assertTrue(matches, msg=msg)
def _get_evidence(self, u_data, v_data, k, edge_data): ev_text = edge_data.get(pc.EVIDENCE) ev_citation = edge_data.get(pc.CITATION) ev_pmid = None if ev_citation: cit_type = ev_citation.namespace cit_ref = ev_citation.identifier if cit_type == pc.CITATION_TYPE_PUBMED: ev_pmid = cit_ref ev_ref = None else: ev_pmid = None ev_ref = '%s: %s' % (cit_type, cit_ref) epistemics = {'direct': _rel_is_direct(edge_data)} annotations = edge_data.get(pc.ANNOTATIONS, {}) annotations['bel'] = edge_to_bel(u_data, v_data, edge_data) if ev_ref: # FIXME what if ev_citation is Falsy? annotations['citation_ref'] = ev_ref context = extract_context(annotations, self.annot_manager) text_location = annotations.pop('TextLocation', None) if text_location: text_location = text_location[0].identifier epistemics['section_type'] = \ _pybel_text_location_map.get(text_location) ev = Evidence(text=ev_text, pmid=ev_pmid, source_api='bel', source_id=k, epistemics=epistemics, annotations=annotations, context=context) return ev
def _get_evidence(u_data, v_data, edge_data): ev_text = edge_data.get(pc.EVIDENCE) ev_citation = edge_data.get(pc.CITATION) ev_pmid = None if ev_citation: cit_type = ev_citation[pc.CITATION_TYPE] cit_ref = ev_citation[pc.CITATION_REFERENCE] if cit_type == pc.CITATION_TYPE_PUBMED: ev_pmid = cit_ref else: ev_pmid = '%s: %s' % (cit_type, cit_ref) epistemics = {'direct': _rel_is_direct(edge_data)} annotations = edge_data.get(pc.ANNOTATIONS, {}) annotations['bel'] = edge_to_bel(u_data, v_data, edge_data) text_location = annotations.pop('TextLocation', None) if text_location is not None: epistemics['section_type'] = _pybel_text_location_map.get( text_location) ev = Evidence(text=ev_text, pmid=ev_pmid, source_api='bel', source_id=edge_data.get(pc.HASH), epistemics=epistemics, annotations=annotations) return ev
def main(directory: str): """Make hetionet exports.""" path = os.path.join(directory, 'hetionet.bel.nodelink.json.gz') if not os.path.exists(path): graph = get_hetionet() to_nodelink_gz(graph, path) else: click.echo('loading pickle from {}'.format(path)) graph = from_nodelink_gz(path) output_bel_gz_path = os.path.join(directory, 'hetionet.bel.gz') if not os.path.exists(output_bel_gz_path): click.echo('outputting whole hetionet as BEL GZ to {}'.format(output_bel_gz_path)) to_bel_script_gz(graph, output_bel_gz_path, use_identifiers=True) output_graphdati_jsonl_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.jsonl.gz') if not os.path.exists(output_graphdati_jsonl_gz_path): click.echo('outputting whole hetionet as BEL GraphDati JSONL GZ to {}'.format(output_graphdati_jsonl_gz_path)) to_graphdati_jsonl_gz(graph, output_graphdati_jsonl_gz_path, use_identifiers=True) output_graphdati_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.json.gz') if not os.path.exists(output_graphdati_gz_path): click.echo('outputting whole hetionet as BEL GraphDati JSON GZ to {}'.format(output_graphdati_gz_path)) to_graphdati_gz(graph, output_graphdati_gz_path, use_identifiers=True) summary_tsv_path = os.path.join(directory, 'hetionet_summary.tsv') if not os.path.exists(summary_tsv_path): click.echo('getting metaedges') rows = [] keep_keys = set() for value in get_metaedge_to_key(graph).values(): u, v, key = choice(list(value)) keep_keys.add(key) d = graph[u][v][key] bel = edge_to_bel(u, v, d, use_identifiers=True) rows.append((key[:8], bel)) df = pd.DataFrame(rows, columns=['key', 'bel']) df.to_csv(summary_tsv_path, sep='\t', index=False) non_sample_edges = [ (u, v, k, d) for u, v, k, d in tqdm(graph.edges(keys=True, data=True), desc='Getting non-sample edges to remove') if k not in keep_keys ] click.echo('Removing non-sample edges') graph.remove_edges_from(non_sample_edges) graph.remove_nodes_from(list(nx.isolates(graph))) sample_bel_path = os.path.join(directory, 'hetionet_sample.bel') click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path)) to_bel_script(graph, sample_bel_path, use_identifiers=True) sample_graphdati_path = os.path.join(directory, 'hetionet_sample.bel.graphdati.json') click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path)) to_graphdati_file(graph, sample_graphdati_path, use_identifiers=True, indent=2)
def test_translation(self): """ 3.3.3 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_translatedto """ statement = 'r(HGNC:AKT1,loc(GO:intracellular)) >> p(HGNC:AKT1)' result = self.parser.relation.parseString(statement) # [[RNA, ['HGNC', 'AKT1']], TRANSLATED_TO, [PROTEIN, ['HGNC', 'AKT1']]] expected_result = { SOURCE: { FUNCTION: RNA, CONCEPT: { NAMESPACE: 'HGNC', NAME: 'AKT1', }, LOCATION: { NAMESPACE: 'GO', NAME: 'intracellular', } }, RELATION: TRANSLATED_TO, TARGET: { FUNCTION: PROTEIN, CONCEPT: { NAMESPACE: 'HGNC', NAME: 'AKT1', }, }, } self.assertEqual(expected_result, result.asDict()) self.assertEqual(2, self.graph.number_of_nodes()) source = rna(name='AKT1', namespace='HGNC') self.assertIn(source, self.graph) target = protein(name='AKT1', namespace='HGNC') self.assertIn(target, self.graph) self.assertEqual(1, self.graph.number_of_edges()) self.assertTrue(self.graph.has_edge(source, target)) key_data = self.parser.graph[source][target] self.assertEqual(1, len(key_data)) key = list(key_data)[0] data = key_data[key] self.assertIn(RELATION, data) self.assertEqual(TRANSLATED_TO, data[RELATION]) calculated_edge_bel = edge_to_bel(source, target, data=data) self.assertEqual( 'r(HGNC:AKT1, loc(GO:intracellular)) translatedTo p(HGNC:AKT1)', calculated_edge_bel)
def assert_has_edge( self: unittest.TestCase, u: BaseEntity, v: BaseEntity, graph: BELGraph, *, only: bool = False, permissive: bool = True, use_identifiers: bool = False, **expected_edge_data ): """A helper function for checking if an edge with the given properties is contained within a graph.""" self.assertIsInstance(u, BaseEntity) self.assertIsInstance(v, BaseEntity) self.assertTrue( graph.has_edge(u, v), msg='Edge ({}, {}) not in graph. Other edges:\n{}'.format(u, v, '\n'.join( edge_to_bel(u, v, d, use_identifiers=use_identifiers) for u, v, d in graph.edges(data=True) )) ) if not expected_edge_data: return if ANNOTATIONS in expected_edge_data: expected_edge_data[ANNOTATIONS] = graph._clean_annotations(expected_edge_data[ANNOTATIONS]) if only: _key, actual_edge_data = list(graph[u][v].items())[0] self.assertEqual(_remove_line(expected_edge_data), _remove_line(actual_edge_data), msg='Only entry not equal') else: actual_dicts = { k: _remove_line(v) for k, v in graph[u][v].items() } if permissive: matches = any_subdict_matches(actual_dicts, _remove_line(expected_edge_data)) else: matches = any_dict_matches(actual_dicts, _remove_line(expected_edge_data)) msg = 'No edge ({}, {}) with correct properties. expected:\n {}\nbut got:\n{}'.format( u, v, dumps(expected_edge_data, indent=2, sort_keys=True), dumps(actual_dicts, indent=2, sort_keys=True), ) self.assertTrue(matches, msg=msg)
def _to_graphml_umbrella(graph): """Convert a BEL graph to GraphML XML file by previously canonicalizing the nodes. :param graph: A BEL graph """ rv = nx.MultiDiGraph() for u, v, key, edge_data in graph.edges(data=True, keys=True): bel_string = edge_to_bel(u, v, edge_data).split(' ') rv.add_edge( bel_string[0], bel_string[2], key=key, relation=edge_data[RELATION], bel=graph.edge_to_bel(u, v, edge_data), ) return rv
def _get_evidence(self, u_data, v_data, k, edge_data): ev_text = edge_data.get(pc.EVIDENCE) ev_citation = edge_data.get(pc.CITATION) ev_pmid = None if ev_citation: cit_type = ev_citation[pc.CITATION_DB] cit_ref = ev_citation[pc.CITATION_IDENTIFIER] if cit_type == pc.CITATION_TYPES[pc.CITATION_TYPE_PUBMED]: ev_pmid = cit_ref ev_ref = None else: ev_pmid = None ev_ref = '%s: %s' % (cit_type, cit_ref) epistemics = {'direct': _rel_is_direct(edge_data)} annotations = edge_data.get(pc.ANNOTATIONS, {}) annotations['bel'] = edge_to_bel(u_data, v_data, edge_data) if ev_ref: # FIXME what if ev_citation is Falsy? annotations['citation_ref'] = ev_ref context = extract_context(annotations, self.annot_manager) text_location = annotations.pop('TextLocation', None) if text_location: # Handle dictionary text_location like {'Abstract': True} if isinstance(text_location, dict): # FIXME: INDRA's section_type entry is meant to contain # a single section string like "abstract" but in principle # pybel could have a list of entries in the TextLocation dict. # Here we just take the first one. text_location = list(text_location.keys())[0] epistemics['section_type'] = \ _pybel_text_location_map.get(text_location) ev = Evidence(text=ev_text, pmid=ev_pmid, source_api='bel', source_id=k, epistemics=epistemics, annotations=annotations, context=context) return ev
def _get_evidence(self, u_data, v_data, k, edge_data): ev_text = edge_data.get(pc.EVIDENCE) ev_citation = edge_data.get(pc.CITATION) ev_pmid = None if ev_citation: cit_type = ev_citation[pc.CITATION_TYPE] cit_ref = ev_citation[pc.CITATION_REFERENCE] if cit_type == pc.CITATION_TYPE_PUBMED: ev_pmid = cit_ref ev_ref = None else: ev_pmid = None ev_ref = '%s: %s' % (cit_type, cit_ref) epistemics = {'direct': _rel_is_direct(edge_data)} annotations = edge_data.get(pc.ANNOTATIONS, {}) annotations['bel'] = edge_to_bel(u_data, v_data, edge_data) if ev_ref: # FIXME what if ev_citation is Falsy? annotations['citation_ref'] = ev_ref context = extract_context(annotations, self.annot_manager) text_location = annotations.pop('TextLocation', None) if text_location: # Handle dictionary text_location like {'Abstract': True} if isinstance(text_location, dict): # FIXME: INDRA's section_type entry is meant to contain # a single section string like "abstract" but in principle # pybel could have a list of entries in the TextLocation dict. # Here we just take the first one. text_location = list(text_location.keys())[0] epistemics['section_type'] = \ _pybel_text_location_map.get(text_location) ev = Evidence(text=ev_text, pmid=ev_pmid, source_api='bel', source_id=k, epistemics=epistemics, annotations=annotations, context=context) return ev
def _get_triples_iter(self): """Get the information from a triple: - the BEL statement, and - for each node in the triple: identifier, name, chromosome it is on, and locus on that chromosome """ for (u_chromosome, v_chromosome ), edges in self.cross_chromosome_to_edge_keys.items(): for u, v, k, d in edges: bel = edge_to_bel(u, v, d) it = itt.product(self._iter_id_name_loci(u), self._iter_id_name_loci(v)) for (u_identifier, u_name, u_locus), (v_identifier, v_name, v_locus) in it: yield ( bel, u_identifier, u_name, u_chromosome, u_locus, v_identifier, v_name, v_chromosome, v_locus, )
def get_statements(self): for u_data, v_data, k, d in self.graph.edges(keys=True, data=True): # We only interpret causal relations, not correlations if d[pc.RELATION] not in pc.CAUSAL_RELATIONS: self.unhandled.append((u_data, v_data, k, d)) continue # If the left or right-hand sides involve complex abundances, # add them as statements for node_ix, node in enumerate((u_data, v_data)): if isinstance(node, dsl.ComplexAbundance): self._get_enum_complex(u_data, v_data, k, d, node_ix) subj_activity = _get_activity_condition(d.get(pc.SUBJECT)) obj_activity = _get_activity_condition(d.get(pc.OBJECT)) obj_to_loc = _get_translocation_target(d.get(pc.OBJECT)) # If the object is a translocation, this represents a controlled # translocation, which we currently do not represent if obj_to_loc: self.unhandled.append((u_data, v_data, k, d)) logger.info( "Controlled translocations are currently not " "handled: %s)", edge_to_bel(u_data, v_data, d)) continue # Modification, e.g. # x(Foo) -> p(Bar, pmod(Ph)) # act(x(Foo)) -> p(Bar, pmod(Ph)) if isinstance(v_data, dsl.Protein) and \ has_protein_modification(v_data): if obj_activity: logger.info( "Ignoring object activity modifier in " "modification statement: %s, %s, %s, %s", u_data, v_data, k, d) else: self._get_modification(u_data, v_data, k, d) elif obj_activity: # If the agents on the left and right hand sides are the same, # then get an active form: # ActiveForm # p(Foo, {variants}) ->/-| act(p(Foo)) # Also Composite active forms: # compositeAbundance(p(Foo, pmod('Ph', 'T')), # p(Foo, pmod('Ph', 'Y'))) ->/-| # act(p(Foo)) if not subj_activity and _proteins_match(u_data, v_data): self._get_active_form(u_data, v_data, k, d) # Gef # act(p(Foo)) => gtp(p(Foo)) # Gap # act(p(Foo)) =| gtp(p(Foo)) elif subj_activity and _rel_is_direct(d) and \ obj_activity.activity_type == 'gtpbound': self._get_gef_gap(u_data, v_data, k, d) # Activation/Inhibition # x(Foo) -> act(x(Foo)) # act(x(Foo)) -> act(x(Foo)) # GtpActivation # gtp(p(Foo)) => act(p(Foo)) else: self._get_regulate_activity(u_data, v_data, k, d) # Activations involving biological processes or pathologies # x(Foo) -> bp(Bar) elif isinstance(v_data, (dsl.BiologicalProcess, dsl.Pathology)): self._get_regulate_activity(u_data, v_data, k, d) # Regulate amount # x(Foo) -> p(Bar) # x(Foo) -> r(Bar) # act(x(Foo)) -> p(Bar): # x(Foo) -> deg(p(Bar)) # act(x(Foo)) ->/-| deg(p(Bar)) elif (isinstance(v_data, ( dsl.Protein, dsl.Rna, dsl.Abundance, dsl.MicroRna, dsl.NamedComplexAbundance, )) and not obj_activity): self._get_regulate_amount(u_data, v_data, k, d) # Controlled conversions # x(Foo) -> rxn(reactants(r1,...,rn), products(p1,...pn)) # act(x(Foo)) -> rxn(reactants(r1,...,rn), products(p1,...pn)) # Note that we can't really handle statements where the relation # is decreases, as inhibition of a reaction match the semantics # of a controlled conversion elif (isinstance(v_data, dsl.Reaction) and d[pc.RELATION] in pc.CAUSAL_INCREASE_RELATIONS): self._get_conversion(u_data, v_data, k, d) # UNHANDLED # rxn(reactants(r1,...,rn), products(p1,...pn)) # Complex(a,b) # p(A, pmod('ph')) -> Complex(A, B) # Complex(A-Ph, B) # Complexes # complex(x(Foo), x(Bar), ...) else: self.unhandled.append((u_data, v_data, k, d))
def to_json_custom( graph: BELGraph, id_key: str = 'id', source_key: str = 'source', target_key: str = 'target', ): """Prepare JSON for the biological network explorer. :param graph: A BEL graph :param id_key: The key to use for the identifier of a node, which is calculated with an enumeration :param source_key: The key to use for the source node :param target_key: The key to use for the target node :rtype: dict """ result = {} mapping = {} result['nodes'] = [] for i, node in enumerate(sorted(graph, key=methodcaller('as_bel'))): data = node.copy() data[id_key] = node.md5 data['bel'] = node.as_bel() if any(attr in data for attr in (VARIANTS, FUSION, MEMBERS)): data['cname'] = data['bel'] result['nodes'].append(data) mapping[node] = i edge_set = set() rr = {} for u, v, key, data in graph.edges(keys=True, data=True): if data[RELATION] in TWO_WAY_RELATIONS and (u, v) != tuple( sorted((u, v), key=methodcaller('as_bel'))): continue # don't keep two way edges twice entry_code = u, v if entry_code not in edge_set: # Avoids duplicate sending multiple edges between nodes with same relation rr[entry_code] = { source_key: mapping[u], target_key: mapping[v], 'contexts': [], } edge_set.add(entry_code) payload = { 'id': key, 'bel': edge_to_bel(u, v, data), } payload.update(data) if data[RELATION] in CAUSAL_INCREASE_RELATIONS: rr[entry_code][RELATION] = INCREASES elif data[RELATION] in CAUSAL_DECREASE_RELATIONS: rr[entry_code][RELATION] = DECREASES rr[entry_code]['contexts'].append(payload) result['links'] = list(rr.values()) return result
def get_statements(self): for u_data, v_data, k, d in self.graph.edges(keys=True, data=True): # We only interpret causal relations, not correlations if d[pc.RELATION] not in pc.CAUSAL_RELATIONS: self.unhandled.append((u_data, v_data, k, d)) continue # If the left or right-hand sides involve complex abundances, # add them as statements for node_ix, node_data in enumerate((u_data, v_data)): if node_data[pc.FUNCTION] == pc.COMPLEX: self._get_complex(u_data, v_data, k, d, node_ix) subj_activity = _get_activity_condition(d.get(pc.SUBJECT)) obj_activity = _get_activity_condition(d.get(pc.OBJECT)) obj_to_loc = _get_translocation_target(d.get(pc.OBJECT)) # If the object is a translocation, this represents a controlled # translocation, which we currently do not represent if obj_to_loc: self.unhandled.append((u_data, v_data, k, d)) logger.info("Controlled translocations are currently not " "handled: %s)", edge_to_bel(u_data, v_data, d)) continue v_func = v_data[pc.FUNCTION] # Modification, e.g. # x(Foo) -> p(Bar, pmod(Ph)) # act(x(Foo)) -> p(Bar, pmod(Ph)) if v_func == pc.PROTEIN and \ has_protein_modification(v_data): if obj_activity: logger.info("Ignoring object activity modifier in " "modification statement: %s, %s, %s, %s", u_data, v_data, k, d) else: self._get_modification(u_data, v_data, k, d) elif obj_activity: # If the agents on the left and right hand sides are the same, # then get an active form: # ActiveForm # p(Foo, {variants}) ->/-| act(p(Foo)) # Also Composite active forms: # compositeAbundance(p(Foo, pmod('Ph', 'T')), # p(Foo, pmod('Ph', 'Y'))) ->/-| # act(p(Foo)) if not subj_activity and _proteins_match(u_data, v_data): self._get_active_form(u_data, v_data, k, d) # Gef # act(p(Foo)) => gtp(p(Foo)) # Gap # act(p(Foo)) =| gtp(p(Foo)) elif subj_activity and _rel_is_direct(d) and \ obj_activity.activity_type == 'gtpbound': self._get_gef_gap(u_data, v_data, k, d) # Activation/Inhibition # x(Foo) -> act(x(Foo)) # act(x(Foo)) -> act(x(Foo)) # GtpActivation # gtp(p(Foo)) => act(p(Foo)) else: self._get_regulate_activity(u_data, v_data, k, d) # Activations involving biological processes or pathologies # x(Foo) -> bp(Bar) elif v_func in (pc.BIOPROCESS, pc.PATHOLOGY): self._get_regulate_activity(u_data, v_data, k, d) # Regulate amount # x(Foo) -> p(Bar) # x(Foo) -> r(Bar) # act(x(Foo)) -> p(Bar): # x(Foo) -> deg(p(Bar)) # act(x(Foo)) ->/-| deg(p(Bar)) elif v_data.function in (pc.PROTEIN, pc.RNA, pc.ABUNDANCE, pc.COMPLEX, pc.MIRNA) and not obj_activity: self._get_regulate_amount(u_data, v_data, k, d) # Controlled conversions # x(Foo) -> rxn(reactants(r1,...,rn), products(p1,...pn)) # act(x(Foo)) -> rxn(reactants(r1,...,rn), products(p1,...pn)) # Note that we can't really handle statements where the relation # is decreases, as inhibition of a reaction match the semantics # of a controlled conversion elif v_data.function == pc.REACTION and \ d[pc.RELATION] in pc.CAUSAL_INCREASE_RELATIONS: self._get_conversion(u_data, v_data, k, d) # UNHANDLED # rxn(reactants(r1,...,rn), products(p1,...pn)) # Complex(a,b) # p(A, pmod('ph')) -> Complex(A, B) # Complex(A-Ph, B) # Complexes # complex(x(Foo), x(Bar), ...) else: self.unhandled.append((u_data, v_data, k, d))