def test_concatenate_multiple_edge_filter(self): def has_odd_source(graph, u, v, k): return u % 2 != 0 def has_even_target(graph, u, v, k): return v % 2 == 0 edges = make_edge_iterator_set(filter_edges(self.universe, [has_odd_source, has_even_target])) self.assertEqual({(1, 2), (1, 4), (5, 6)}, edges) self.assertEqual(3, count_passed_edge_filter(self.universe, [has_odd_source, has_even_target])) has_even_source = invert_edge_predicate(has_odd_source) edges = make_edge_iterator_set(filter_edges(self.universe, has_even_source)) self.assertEqual({(2, 3), (8, 2)}, edges)
def parse_authors(graph, force_parse=False): """Parses all of the citation author strings to lists by splitting on the pipe character "|" :param pybel.BELGraph graph: A BEL graph :param bool force_parse: Forces serialization without checking the tag :return: A set of all authors in this graph :rtype: set[str] """ if not force_parse and 'PYBEL_PARSED_AUTHORS' in graph.graph: log.debug('Authors have already been parsed in %s', graph.name) return all_authors = set() for u, v, k, d in filter_edges(graph, has_authors): author_str = d[CITATION][CITATION_AUTHORS] if isinstance(author_str, list): all_authors.update(author_str) continue if not isinstance(author_str, str): continue edge_authors = list(author_str.split('|')) all_authors.update(edge_authors) graph.edge[u][v][k][CITATION][CITATION_AUTHORS] = edge_authors graph.graph['PYBEL_PARSED_AUTHORS'] = True return all_authors
def get_evidences_by_pmid(graph, pmids): """Gets a dictionary from the given PubMed identifiers to the sets of all evidence strings associated with each in the graph :param pybel.BELGraph graph: A BEL graph :param str or iter[str] pmids: An iterable of PubMed identifiers, as strings. Is consumed and converted to a set. :return: A dictionary of {pmid: set of all evidence strings} :rtype: dict """ result = defaultdict(set) for _, _, _, data in filter_edges(graph, build_pmid_inclusion_filter(pmids)): result[data[CITATION][CITATION_REFERENCE]].add(data[EVIDENCE]) return dict(result)
def infer_missing_inverse_edge(graph, relations): """Adds inferred edges based on pre-defined axioms :param pybel.BELGraph graph: A BEL network :param relations: single or iterable of relation names to add their inverse inferred edges :type relations: str or iter[str] """ if isinstance(relations, str): return infer_missing_inverse_edge(graph, [relations]) for u, v, _, d in filter_edges(graph, build_relation_filter(relations)): relation = d[RELATION] graph.add_edge(v, u, key=unqualified_edge_code[relation], **{RELATION: INFERRED_INVERSE[relation]})
def count_citations(graph, **annotations): """Counts the citations in a graph based on a given filter :param pybel.BELGraph graph: A BEL graph :param dict annotations: The annotation filters to use :return: A counter from {(citation type, citation reference): frequency} :rtype: collections.Counter """ citations = defaultdict(set) annotation_dict_filter = build_edge_data_filter(annotations) for u, v, _, d in filter_edges(graph, annotation_dict_filter): if CITATION not in d: continue citations[u, v].add(get_citation_pair(d)) counter = Counter(itt.chain.from_iterable(citations.values())) return counter
def serialize_authors(graph, force_serialize=False): """Recombines all authors with the pipe character "|". :param pybel.BELGraph graph: A BEL graph :param bool force_serialize: Forces serialization without checking the tag """ if not force_serialize and 'PYBEL_PARSED_AUTHORS' not in graph.graph: log.warning('Authors have not yet been parsed in %s', graph.name) return for u, v, k, d in filter_edges(graph, has_authors): authors = d[CITATION][CITATION_AUTHORS] if not isinstance(authors, list): continue graph.edge[u][v][k][CITATION][CITATION_AUTHORS] = '|'.join(authors) if 'PYBEL_PARSED_AUTHORS' in graph.graph: del graph.graph['PYBEL_PARSED_AUTHORS']
def enrich_pubmed_citations(graph, stringify_authors=False, manager=None): """Overwrites all PubMed citations with values from NCBI's eUtils lookup service. Sets authors as list, so probably a good idea to run :func:`pybel_tools.mutation.serialize_authors` before exporting. :param pybel.BELGraph graph: A BEL graph :param bool stringify_authors: Converts all author lists to author strings using :func:`pybel_tools.mutation.serialize_authors`. Defaults to ``False``. :param manager: An RFC-1738 database connection string, a pre-built :class:`pybel.manager.Manager`, or ``None`` for default connection :type manager: None or str or Manager :return: A set of PMIDs for which the eUtils service crashed :rtype: set[str] """ if 'PYBEL_ENRICHED_CITATIONS' in graph.graph: log.warning('citations have already been enriched in %s', graph) return set() pmids = get_pubmed_identifiers(graph) pmid_data, errors = get_citations_by_pmids(manager=manager, pmids=pmids) for u, v, k in filter_edges(graph, has_pubmed): pmid = graph.edge[u][v][k][CITATION][CITATION_REFERENCE].strip() if pmid not in pmid_data: log.warning('Missing data for PubMed identifier: %s', pmid) errors.add(pmid) continue graph.edge[u][v][k][CITATION].update(pmid_data[pmid]) if stringify_authors: serialize_authors(graph) else: graph.graph['PYBEL_PARSED_AUTHORS'] = True graph.graph['PYBEL_ENRICHED_CITATIONS'] = True return errors
def test_missing_edge_filter(self): edges = make_edge_iterator_set( filter_edges(self.graph, and_edge_predicates([]))) self.assertEqual(({(1, 2)}), edges)
def test_keep_edge_unpermissive(self): keep_edge_restrictive = invert_edge_predicate(true_edge_predicate) edges = make_edge_iterator_set( filter_edges(self.graph, keep_edge_restrictive)) self.assertEqual(set(), edges)
def test_keep_edge_permissive(self): edges = make_edge_iterator_set( filter_edges(self.graph, true_edge_predicate)) self.assertEqual({(1, 2)}, edges)
def test_no_edge_filter(self): edges = make_edge_iterator_set(filter_edges(self.graph, [])) self.assertEqual({(1, 2)}, edges)
def test_concatenate_single_edge_filter(self): edges = make_edge_iterator_set( filter_edges(self.graph, [true_edge_predicate])) self.assertEqual({(1, 2)}, edges)
def test_concatenate_single_edge_filter(self): edges = make_edge_iterator_set( filter_edges(self.graph, [keep_edge_permissive])) self.assertEqual({(1, 2)}, edges)
def _collapse_edge_passing_predicates(graph: BELGraph, edge_predicates: EdgePredicates = None ) -> None: """Collapse all edges passing the given edge predicates.""" for u, v, _ in filter_edges(graph, edge_predicates=edge_predicates): collapse_pair(graph, survivor=u, victim=v)