Beispiel #1
0
    def test_concatenate_multiple_edge_filter(self):
        def has_odd_source(graph, u, v, k):
            return u % 2 != 0

        def has_even_target(graph, u, v, k):
            return v % 2 == 0

        edges = make_edge_iterator_set(filter_edges(self.universe, [has_odd_source, has_even_target]))
        self.assertEqual({(1, 2), (1, 4), (5, 6)}, edges)

        self.assertEqual(3, count_passed_edge_filter(self.universe, [has_odd_source, has_even_target]))

        has_even_source = invert_edge_predicate(has_odd_source)
        edges = make_edge_iterator_set(filter_edges(self.universe, has_even_source))
        self.assertEqual({(2, 3), (8, 2)}, edges)
Beispiel #2
0
def parse_authors(graph, force_parse=False):
    """Parses all of the citation author strings to lists by splitting on the pipe character "|"

    :param pybel.BELGraph graph: A BEL graph
    :param bool force_parse: Forces serialization without checking the tag
    :return: A set of all authors in this graph
    :rtype: set[str]
    """
    if not force_parse and 'PYBEL_PARSED_AUTHORS' in graph.graph:
        log.debug('Authors have already been parsed in %s', graph.name)
        return

    all_authors = set()

    for u, v, k, d in filter_edges(graph, has_authors):
        author_str = d[CITATION][CITATION_AUTHORS]

        if isinstance(author_str, list):
            all_authors.update(author_str)
            continue

        if not isinstance(author_str, str):
            continue

        edge_authors = list(author_str.split('|'))
        all_authors.update(edge_authors)
        graph.edge[u][v][k][CITATION][CITATION_AUTHORS] = edge_authors

    graph.graph['PYBEL_PARSED_AUTHORS'] = True

    return all_authors
Beispiel #3
0
def get_evidences_by_pmid(graph, pmids):
    """Gets a dictionary from the given PubMed identifiers to the sets of all evidence strings associated with each
    in the graph

    :param pybel.BELGraph graph: A BEL graph
    :param str or iter[str] pmids: An iterable of PubMed identifiers, as strings. Is consumed and converted to a set.
    :return: A dictionary of {pmid: set of all evidence strings}
    :rtype: dict
    """
    result = defaultdict(set)

    for _, _, _, data in filter_edges(graph,
                                      build_pmid_inclusion_filter(pmids)):
        result[data[CITATION][CITATION_REFERENCE]].add(data[EVIDENCE])

    return dict(result)
Beispiel #4
0
def infer_missing_inverse_edge(graph, relations):
    """Adds inferred edges based on pre-defined axioms

    :param pybel.BELGraph graph: A BEL network
    :param relations: single or iterable of relation names to add their inverse inferred edges
    :type relations: str or iter[str]
    """

    if isinstance(relations, str):
        return infer_missing_inverse_edge(graph, [relations])

    for u, v, _, d in filter_edges(graph, build_relation_filter(relations)):
        relation = d[RELATION]
        graph.add_edge(v,
                       u,
                       key=unqualified_edge_code[relation],
                       **{RELATION: INFERRED_INVERSE[relation]})
Beispiel #5
0
def count_citations(graph, **annotations):
    """Counts the citations in a graph based on a given filter

    :param pybel.BELGraph graph: A BEL graph
    :param dict annotations: The annotation filters to use
    :return: A counter from {(citation type, citation reference): frequency}
    :rtype: collections.Counter
    """
    citations = defaultdict(set)

    annotation_dict_filter = build_edge_data_filter(annotations)

    for u, v, _, d in filter_edges(graph, annotation_dict_filter):
        if CITATION not in d:
            continue

        citations[u, v].add(get_citation_pair(d))

    counter = Counter(itt.chain.from_iterable(citations.values()))
    return counter
Beispiel #6
0
def serialize_authors(graph, force_serialize=False):
    """Recombines all authors with the pipe character "|".

    :param pybel.BELGraph graph: A BEL graph
    :param bool force_serialize: Forces serialization without checking the tag
    """
    if not force_serialize and 'PYBEL_PARSED_AUTHORS' not in graph.graph:
        log.warning('Authors have not yet been parsed in %s', graph.name)
        return

    for u, v, k, d in filter_edges(graph, has_authors):
        authors = d[CITATION][CITATION_AUTHORS]

        if not isinstance(authors, list):
            continue

        graph.edge[u][v][k][CITATION][CITATION_AUTHORS] = '|'.join(authors)

    if 'PYBEL_PARSED_AUTHORS' in graph.graph:
        del graph.graph['PYBEL_PARSED_AUTHORS']
Beispiel #7
0
def enrich_pubmed_citations(graph, stringify_authors=False, manager=None):
    """Overwrites all PubMed citations with values from NCBI's eUtils lookup service.

    Sets authors as list, so probably a good idea to run :func:`pybel_tools.mutation.serialize_authors` before
    exporting.

    :param pybel.BELGraph graph: A BEL graph
    :param bool stringify_authors: Converts all author lists to author strings using
                                  :func:`pybel_tools.mutation.serialize_authors`. Defaults to ``False``.
    :param manager: An RFC-1738 database connection string, a pre-built :class:`pybel.manager.Manager`,
                    or ``None`` for default connection
    :type manager: None or str or Manager
    :return: A set of PMIDs for which the eUtils service crashed
    :rtype: set[str]
    """
    if 'PYBEL_ENRICHED_CITATIONS' in graph.graph:
        log.warning('citations have already been enriched in %s', graph)
        return set()

    pmids = get_pubmed_identifiers(graph)
    pmid_data, errors = get_citations_by_pmids(manager=manager, pmids=pmids)

    for u, v, k in filter_edges(graph, has_pubmed):
        pmid = graph.edge[u][v][k][CITATION][CITATION_REFERENCE].strip()

        if pmid not in pmid_data:
            log.warning('Missing data for PubMed identifier: %s', pmid)
            errors.add(pmid)
            continue

        graph.edge[u][v][k][CITATION].update(pmid_data[pmid])

    if stringify_authors:
        serialize_authors(graph)
    else:
        graph.graph['PYBEL_PARSED_AUTHORS'] = True

    graph.graph['PYBEL_ENRICHED_CITATIONS'] = True

    return errors
 def test_missing_edge_filter(self):
     edges = make_edge_iterator_set(
         filter_edges(self.graph, and_edge_predicates([])))
     self.assertEqual(({(1, 2)}), edges)
 def test_keep_edge_unpermissive(self):
     keep_edge_restrictive = invert_edge_predicate(true_edge_predicate)
     edges = make_edge_iterator_set(
         filter_edges(self.graph, keep_edge_restrictive))
     self.assertEqual(set(), edges)
 def test_keep_edge_permissive(self):
     edges = make_edge_iterator_set(
         filter_edges(self.graph, true_edge_predicate))
     self.assertEqual({(1, 2)}, edges)
 def test_no_edge_filter(self):
     edges = make_edge_iterator_set(filter_edges(self.graph, []))
     self.assertEqual({(1, 2)}, edges)
 def test_concatenate_single_edge_filter(self):
     edges = make_edge_iterator_set(
         filter_edges(self.graph, [true_edge_predicate]))
     self.assertEqual({(1, 2)}, edges)
Beispiel #13
0
 def test_concatenate_single_edge_filter(self):
     edges = make_edge_iterator_set(
         filter_edges(self.graph, [keep_edge_permissive]))
     self.assertEqual({(1, 2)}, edges)
Beispiel #14
0
def _collapse_edge_passing_predicates(graph: BELGraph,
                                      edge_predicates: EdgePredicates = None
                                      ) -> None:
    """Collapse all edges passing the given edge predicates."""
    for u, v, _ in filter_edges(graph, edge_predicates=edge_predicates):
        collapse_pair(graph, survivor=u, victim=v)