def load_and_merge(config: dict, load_config): """ Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph. The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance OR be serialized into a file. \f .. note:: Everything here is driven by the ``load_config`` YAML. Parameters ---------- """ with open(load_config, 'r') as YML: cfg = yaml.load(YML, Loader=yaml.FullLoader) transformers = [] for key in cfg['target']: target = cfg['target'][key] logging.info("Loading {}".format(key)) if target['type'] in get_file_types(): # loading from a file transformer = get_transformer(target['type'])() transformer.parse(target['filename']) transformers.append(transformer) elif target['type'] == 'neo4j': transformer = kgx.NeoTransformer(None, target['uri'], target['username'], target['password']) # TODO: support filters transformer.load() transformers.append(transformer) else: logging.error( "type {} not yet supported for KGX load-and-merge operation.". format(target['type'])) merged_transformer = Transformer() merged_transformer.merge_graphs([x.graph for x in transformers]) destination = cfg['destination'] if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']: destination_transformer = get_transformer(destination['type'])() destination_transformer.save(destination['filename']) elif destination['type'] == 'neo4j': destination_transformer = kgx.NeoTransformer( merged_transformer.graph, uri=destination['uri'], username=destination['username'], password=destination['password']) destination_transformer.save_with_unwind() else: logging.error( "type {} not yet supported for KGX load-and-merge operation.". format(destination['type']))
def load_and_merge(merge_config, destination_uri, destination_username, destination_password): """ Load nodes and edges from KGs, as defined in a config YAML, and merge them into a single graph """ with open(merge_config, 'r') as ymlfile: cfg = yaml.load(ymlfile) transformers = [] for key in cfg['target']: logging.info("Connecting to {}".format(cfg['target'][key])) uri = "{}:{}".format(cfg['target'][key]['neo4j']['host'], cfg['target'][key]['neo4j']['port']) n = kgx.NeoTransformer(None, uri, cfg['target'][key]['neo4j']['username'], cfg['target'][key]['neo4j']['password']) transformers.append(n) if 'target_filter' in cfg['target'][key]: for target_filter in cfg['target'][key]['target_filter']: # Set filters n.set_filter( target_filter, cfg['target'][key]['target_filter'][target_filter]) start = 0 end = None if 'query_limits' in cfg['target'][key]: if 'start' in cfg['target'][key]['query_limits']: start = cfg['target'][key]['query_limits']['start'] if 'end' in cfg['target'][key]['query_limits']: end = cfg['target'][key]['query_limits']['end'] n.load(start=start, end=end) mergedTransformer = Transformer() mergedTransformer.merge([x.graph for x in transformers]) if destination_uri and destination_username and destination_password: destination = kgx.NeoTransformer(mergedTransformer.graph, uri=destination_uri, username=destination_username, password=destination_password) destination.save_with_unwind()
def make_neo4j_transformer(address, username, password): o = urlparse(address) if o.password is None and password is None: error('Could not extract the password from the address, please set password argument') elif password is None: password = o.password if o.username is None and username is None: error('Could not extract the username from the address, please set username argument') elif username is None: username = o.username return kgx.NeoTransformer( host=o.hostname, port=o.port, username=username, password=password )
def load_and_merge(config: dict, load_config): """ Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph. The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance OR be serialized into a file. \f .. note:: Everything here is driven by the ``load_config`` YAML. Parameters ---------- """ with open(load_config, 'r') as YML: cfg = yaml.load(YML, Loader=yaml.FullLoader) transformers = [] for key in cfg['target']: target = cfg['target'][key] logging.info("Loading {}".format(key)) if target['type'] in get_file_types(): # loading from a file transformer = get_transformer(target['type'])() if target['type'] in {'tsv', 'neo4j'}: # currently supporting filters only for TSV and Neo4j if 'filters' in target: filters = target['filters'] node_filters = filters[ 'node_filters'] if 'node_filters' in filters else {} edge_filters = filters[ 'edge_filters'] if 'edge_filters' in filters else {} for k, v in node_filters.items(): transformer.set_node_filter(k, set(v)) for k, v in edge_filters.items(): transformer.set_edge_filter(k, set(v)) logging.info(f"with node filters: {node_filters}") logging.info(f"with edge filters: {edge_filters}") for f in target['filename']: transformer.parse(f, input_format=target['type']) transformers.append(transformer) elif target['type'] == 'neo4j': transformer = kgx.NeoTransformer(None, target['uri'], target['username'], target['password']) if 'filters' in target: filters = target['filters'] node_filters = filters[ 'node_filters'] if 'node_filters' in filters else {} edge_filters = filters[ 'edge_filters'] if 'edge_filters' in filters else {} for k, v in node_filters.items(): transformer.set_node_filter(k, set(v)) for k, v in edge_filters.items(): transformer.set_edge_filter(k, set(v)) logging.info(f"with node filters: {node_filters}") logging.info(f"with edge filters: {edge_filters}") transformer.load() transformers.append(transformer) else: logging.error( "type {} not yet supported for KGX load-and-merge operation.". format(target['type'])) merged_graph = merge_all_graphs([x.graph for x in transformers]) destination = cfg['destination'] if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']: destination_transformer = get_transformer( destination['type'])(merged_graph) destination_transformer.save(destination['filename']) elif destination['type'] == 'neo4j': destination_transformer = kgx.NeoTransformer( merged_graph, uri=destination['uri'], username=destination['username'], password=destination['password']) destination_transformer.save() else: logging.error( "type {} not yet supported for KGX load-and-merge operation.". format(destination['type']))