コード例 #1
0
def load_and_merge(config: dict, load_config):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance
    OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``load_config`` YAML.

    Parameters
    ----------
    """

    with open(load_config, 'r') as YML:
        cfg = yaml.load(YML, Loader=yaml.FullLoader)

    transformers = []
    for key in cfg['target']:
        target = cfg['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            transformer.parse(target['filename'])
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = kgx.NeoTransformer(None, target['uri'],
                                             target['username'],
                                             target['password'])
            # TODO: support filters
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(target['type']))

    merged_transformer = Transformer()
    merged_transformer.merge_graphs([x.graph for x in transformers])

    destination = cfg['destination']
    if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
        destination_transformer = get_transformer(destination['type'])()
        destination_transformer.save(destination['filename'])
    elif destination['type'] == 'neo4j':
        destination_transformer = kgx.NeoTransformer(
            merged_transformer.graph,
            uri=destination['uri'],
            username=destination['username'],
            password=destination['password'])
        destination_transformer.save_with_unwind()
    else:
        logging.error(
            "type {} not yet supported for KGX load-and-merge operation.".
            format(destination['type']))
コード例 #2
0
ファイル: translator_kgx.py プロジェクト: todun/kgx
def load_and_merge(merge_config, destination_uri, destination_username,
                   destination_password):
    """
    Load nodes and edges from KGs, as defined in a config YAML, and merge them into a single graph
    """

    with open(merge_config, 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    transformers = []
    for key in cfg['target']:
        logging.info("Connecting to {}".format(cfg['target'][key]))
        uri = "{}:{}".format(cfg['target'][key]['neo4j']['host'],
                             cfg['target'][key]['neo4j']['port'])
        n = kgx.NeoTransformer(None, uri,
                               cfg['target'][key]['neo4j']['username'],
                               cfg['target'][key]['neo4j']['password'])
        transformers.append(n)

        if 'target_filter' in cfg['target'][key]:
            for target_filter in cfg['target'][key]['target_filter']:
                # Set filters
                n.set_filter(
                    target_filter,
                    cfg['target'][key]['target_filter'][target_filter])

        start = 0
        end = None
        if 'query_limits' in cfg['target'][key]:
            if 'start' in cfg['target'][key]['query_limits']:
                start = cfg['target'][key]['query_limits']['start']
            if 'end' in cfg['target'][key]['query_limits']:
                end = cfg['target'][key]['query_limits']['end']

        n.load(start=start, end=end)

    mergedTransformer = Transformer()
    mergedTransformer.merge([x.graph for x in transformers])

    if destination_uri and destination_username and destination_password:
        destination = kgx.NeoTransformer(mergedTransformer.graph,
                                         uri=destination_uri,
                                         username=destination_username,
                                         password=destination_password)
        destination.save_with_unwind()
コード例 #3
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_serialization():
    graphs = get_graphs()
    t1 = Transformer(source_graph=graphs[0])
    assert t1.is_empty() is False
    Transformer.dump_to_file(t1.graph, os.path.join(target_dir, 'graph_serialization.json'))

    new_graph = Transformer.restore_from_file(os.path.join(target_dir, 'graph_serialization.json'))
    t2 = Transformer(source_graph=new_graph)
    assert t1.is_empty() is False
    assert t2.graph.number_of_nodes() == t1.graph.number_of_nodes()
    assert t2.graph.number_of_edges() == t1.graph.number_of_edges()
コード例 #4
0
ファイル: merge_kg.py プロジェクト: shenfcStanley/kg-covid-19
def load_and_merge(yaml_file: str) -> Transformer:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        kgx.Transformer: The merged transformer that contains the merged graph.

    """
    config = parse_load_config(yaml_file)
    transformers: List = []

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            for f in target['filename']:
                transformer.parse(f, input_format='tsv')
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error("type {} not yet supported".format(target['type']))

    # merge all subgraphs into a single graph
    merged_transformer = Transformer()
    merged_transformer.merge_graphs([x.graph for x in transformers])
    merged_transformer.report()

    # write the merged graph
    if 'destination' in config:
        destination = config['destination']
        if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
            destination_transformer = get_transformer(destination['type'])(
                merged_transformer.graph)
            destination_transformer.save(destination['filename'],
                                         extension=destination['type'])
        elif destination['type'] == 'neo4j':
            destination_transformer = NeoTransformer(
                merged_transformer.graph,
                uri=destination['uri'],
                username=destination['username'],
                password=destination['password'])
            destination_transformer.save_with_unwind()
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(destination['type']))

    return merged_transformer
コード例 #5
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def set_transformer_filters(transformer:Transformer, labels:list, properties:list) -> None:
    for location, label in labels:
        if location == FilterLocation.EDGE.value:
            target = '{}_label'.format(location)
            transformer.set_filter(target=target, value=label)
        else:
            target = '{}_category'.format(location)
            transformer.set_filter(target=target, value=label)

    for location, property_name, property_value in properties:
        target = '{}_property'.format(location)
        transformer.set_filter(target=target, value=(property_name, property_value))
コード例 #6
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_validate_correct_edge(edge):
    e = Transformer.validate_edge(edge)
    assert e is not None
コード例 #7
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_validate_incorrect_edge(edge):
    with pytest.raises(KeyError):
        Transformer.validate_edge(edge)
コード例 #8
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_validate_correct_node(node):
    n = Transformer.validate_node(node)
    assert n is not None
    assert 'category' in n
    assert n['category'][0] == Transformer.DEFAULT_NODE_CATEGORY
コード例 #9
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_validate_incorrect_node(node):
    with pytest.raises(KeyError):
        Transformer.validate_node(node)
コード例 #10
0
ファイル: test_transformer.py プロジェクト: vemonet/kgx
def test_transformer():
    t = Transformer()
    assert isinstance(t.graph, Graph)
    assert t.is_empty()

    t.set_node_filter('category', {'biolink:Gene'})
    t.set_node_filter('category', {'biolink:Disease'})
    t.set_edge_filter('edge_label', {'biolink:related_to'})
    t.set_edge_filter('edge_label', {'biolink:interacts_with'})
    t.set_edge_filter('subject_category', {'biolink:Drug'})
    assert len(t.node_filters.keys()) == 1
    assert len(t.edge_filters.keys()) == 3
    assert 'category' in t.node_filters and len(t.node_filters['category']) == 3
    assert 'edge_label' in t.edge_filters and len(t.edge_filters['edge_label']) == 2
    assert 'subject_category' in t.edge_filters \
           and len(t.edge_filters['subject_category']) == 3 \
           and 'biolink:Gene' in t.edge_filters['subject_category']
    assert 'object_category' in t.edge_filters \
           and len(t.edge_filters['object_category']) == 3 \
           and 'biolink:Gene' in t.edge_filters['object_category']
    assert 'biolink:Drug' in t.node_filters['category']