Example #1
0
def test_get_prefix_prioritization_map():
    """
    Test to get a prefix prioritization map.
    """
    prioritization_map = get_prefix_prioritization_map()
    assert 'biolink:Gene' in prioritization_map.keys()
    assert 'biolink:Protein' in prioritization_map.keys()
    assert 'biolink:Disease' in prioritization_map.keys()
Example #2
0
def clique_merge(
    target_graph: BaseGraph,
    leader_annotation: str = None,
    prefix_prioritization_map: Optional[Dict[str, List[str]]] = None,
    category_mapping: Optional[Dict[str, str]] = None,
    strict: bool = True,
) -> Tuple[BaseGraph, nx.MultiDiGraph]:
    """

    Parameters
    ----------
    target_graph: kgx.graph.base_graph.BaseGraph
        The original graph
    leader_annotation: str
        The field on a node that signifies that the node is the leader of a clique
    prefix_prioritization_map: Optional[Dict[str, List[str]]]
        A map that gives a prefix priority for one or more categories
    category_mapping: Optional[Dict[str, str]]
        Mapping for non-Biolink Model categories to Biolink Model categories
    strict: bool
        Whether or not to merge nodes in a clique that have conflicting node categories

    Returns
    -------
    Tuple[kgx.graph.base_graph.BaseGraph, networkx.MultiDiGraph]
        A tuple containing the updated target graph, and the clique graph

    """
    ppm = get_prefix_prioritization_map()
    if prefix_prioritization_map:
        ppm.update(prefix_prioritization_map)
    prefix_prioritization_map = ppm

    if not leader_annotation:
        leader_annotation = LEADER_ANNOTATION

    start = current_time_in_millis()
    clique_graph = build_cliques(target_graph)
    end = current_time_in_millis()
    log.info(f"Total time taken to build cliques: {end - start} ms")

    start = current_time_in_millis()
    elect_leader(
        target_graph,
        clique_graph,
        leader_annotation,
        prefix_prioritization_map,
        category_mapping,
        strict,
    )
    end = current_time_in_millis()
    log.info(f"Total time taken to elect leaders for all cliques: {end - start} ms")

    start = current_time_in_millis()
    graph = consolidate_edges(target_graph, clique_graph, leader_annotation)
    end = current_time_in_millis()
    log.info(f"Total time taken to consolidate edges in target graph: {end - start} ms")
    return graph, clique_graph
Example #3
0
def test_get_prefix_prioritization_map():
    prioritization_map = get_prefix_prioritization_map()
    assert 'biolink:Gene' in prioritization_map.keys()
    assert 'biolink:Protein' in prioritization_map.keys()
    assert 'biolink:Disease' in prioritization_map.keys()
Example #4
0
import logging
from typing import Optional, Tuple

import networkx as nx
import stringcase

from kgx.utils.kgx_utils import generate_edge_key, get_toolkit, snakecase_to_sentencecase, sentencecase_to_snakecase, \
    get_prefix_prioritization_map, get_biolink_element, get_biolink_ancestors

SAME_AS = 'biolink:same_as'
LEADER_ANNOTATION = 'clique_leader'
PREFIX_PRIORITIZATION_MAP = get_prefix_prioritization_map()
MAPPING = {}

class CliqueMerge(object):
    """

    """

    def __init__(self, prefix_prioritization_map: dict = None):
        self.toolkit = get_toolkit()
        self.clique_graph = nx.Graph()
        self.target_graph = None
        if prefix_prioritization_map:
            for x, v in prefix_prioritization_map.items():
                PREFIX_PRIORITIZATION_MAP[x] = v

    def build_cliques(self, target_graph: nx.MultiDiGraph):
        """
        Builds a clique graph from ``same_as`` edges in ``target_graph``.