def load_and_merge(config: dict, load_config):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance
    OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``load_config`` YAML.

    Parameters
    ----------
    """

    with open(load_config, 'r') as YML:
        cfg = yaml.load(YML, Loader=yaml.FullLoader)

    transformers = []
    for key in cfg['target']:
        target = cfg['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            transformer.parse(target['filename'])
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = kgx.NeoTransformer(None, target['uri'],
                                             target['username'],
                                             target['password'])
            # TODO: support filters
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(target['type']))

    merged_transformer = Transformer()
    merged_transformer.merge_graphs([x.graph for x in transformers])

    destination = cfg['destination']
    if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
        destination_transformer = get_transformer(destination['type'])()
        destination_transformer.save(destination['filename'])
    elif destination['type'] == 'neo4j':
        destination_transformer = kgx.NeoTransformer(
            merged_transformer.graph,
            uri=destination['uri'],
            username=destination['username'],
            password=destination['password'])
        destination_transformer.save_with_unwind()
    else:
        logging.error(
            "type {} not yet supported for KGX load-and-merge operation.".
            format(destination['type']))
Exemple #2
0
def load_and_merge(merge_config, destination_uri, destination_username,
                   destination_password):
    """
    Load nodes and edges from KGs, as defined in a config YAML, and merge them into a single graph
    """

    with open(merge_config, 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    transformers = []
    for key in cfg['target']:
        logging.info("Connecting to {}".format(cfg['target'][key]))
        uri = "{}:{}".format(cfg['target'][key]['neo4j']['host'],
                             cfg['target'][key]['neo4j']['port'])
        n = kgx.NeoTransformer(None, uri,
                               cfg['target'][key]['neo4j']['username'],
                               cfg['target'][key]['neo4j']['password'])
        transformers.append(n)

        if 'target_filter' in cfg['target'][key]:
            for target_filter in cfg['target'][key]['target_filter']:
                # Set filters
                n.set_filter(
                    target_filter,
                    cfg['target'][key]['target_filter'][target_filter])

        start = 0
        end = None
        if 'query_limits' in cfg['target'][key]:
            if 'start' in cfg['target'][key]['query_limits']:
                start = cfg['target'][key]['query_limits']['start']
            if 'end' in cfg['target'][key]['query_limits']:
                end = cfg['target'][key]['query_limits']['end']

        n.load(start=start, end=end)

    mergedTransformer = Transformer()
    mergedTransformer.merge([x.graph for x in transformers])

    if destination_uri and destination_username and destination_password:
        destination = kgx.NeoTransformer(mergedTransformer.graph,
                                         uri=destination_uri,
                                         username=destination_username,
                                         password=destination_password)
        destination.save_with_unwind()
Exemple #3
0
def make_neo4j_transformer(address, username, password):
    o = urlparse(address)

    if o.password is None and password is None:
        error('Could not extract the password from the address, please set password argument')
    elif password is None:
        password = o.password

    if o.username is None and username is None:
        error('Could not extract the username from the address, please set username argument')
    elif username is None:
        username = o.username

    return kgx.NeoTransformer(
        host=o.hostname,
        port=o.port,
        username=username,
        password=password
    )
Exemple #4
0
def load_and_merge(config: dict, load_config):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance
    OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``load_config`` YAML.

    Parameters
    ----------
    """
    with open(load_config, 'r') as YML:
        cfg = yaml.load(YML, Loader=yaml.FullLoader)

    transformers = []
    for key in cfg['target']:
        target = cfg['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            if target['type'] in {'tsv', 'neo4j'}:
                # currently supporting filters only for TSV and Neo4j
                if 'filters' in target:
                    filters = target['filters']
                    node_filters = filters[
                        'node_filters'] if 'node_filters' in filters else {}
                    edge_filters = filters[
                        'edge_filters'] if 'edge_filters' in filters else {}
                    for k, v in node_filters.items():
                        transformer.set_node_filter(k, set(v))
                    for k, v in edge_filters.items():
                        transformer.set_edge_filter(k, set(v))
                    logging.info(f"with node filters: {node_filters}")
                    logging.info(f"with edge filters: {edge_filters}")
            for f in target['filename']:
                transformer.parse(f, input_format=target['type'])
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = kgx.NeoTransformer(None, target['uri'],
                                             target['username'],
                                             target['password'])
            if 'filters' in target:
                filters = target['filters']
                node_filters = filters[
                    'node_filters'] if 'node_filters' in filters else {}
                edge_filters = filters[
                    'edge_filters'] if 'edge_filters' in filters else {}
                for k, v in node_filters.items():
                    transformer.set_node_filter(k, set(v))
                for k, v in edge_filters.items():
                    transformer.set_edge_filter(k, set(v))
                logging.info(f"with node filters: {node_filters}")
                logging.info(f"with edge filters: {edge_filters}")
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(target['type']))

    merged_graph = merge_all_graphs([x.graph for x in transformers])

    destination = cfg['destination']
    if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
        destination_transformer = get_transformer(
            destination['type'])(merged_graph)
        destination_transformer.save(destination['filename'])
    elif destination['type'] == 'neo4j':
        destination_transformer = kgx.NeoTransformer(
            merged_graph,
            uri=destination['uri'],
            username=destination['username'],
            password=destination['password'])
        destination_transformer.save()
    else:
        logging.error(
            "type {} not yet supported for KGX load-and-merge operation.".
            format(destination['type']))