def load_and_merge(yaml_file: str) -> Transformer: """Load and merge sources defined in the config YAML. Args: yaml_file: A string pointing to a KGX compatible config YAML. Returns: kgx.Transformer: The merged transformer that contains the merged graph. """ config = parse_load_config(yaml_file) transformers: List = [] # read all the sources defined in the YAML for key in config['target']: target = config['target'][key] logging.info("Loading {}".format(key)) if target['type'] in get_file_types(): # loading from a file transformer = get_transformer(target['type'])() for f in target['filename']: transformer.parse(f, input_format='tsv') transformers.append(transformer) elif target['type'] == 'neo4j': transformer = NeoTransformer(None, target['uri'], target['username'], target['password']) transformer.load() transformers.append(transformer) else: logging.error("type {} not yet supported".format(target['type'])) # merge all subgraphs into a single graph merged_transformer = Transformer() merged_transformer.merge_graphs([x.graph for x in transformers]) merged_transformer.report() # write the merged graph if 'destination' in config: destination = config['destination'] if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']: destination_transformer = get_transformer(destination['type'])( merged_transformer.graph) destination_transformer.save(destination['filename'], extension=destination['type']) elif destination['type'] == 'neo4j': destination_transformer = NeoTransformer( merged_transformer.graph, uri=destination['uri'], username=destination['username'], password=destination['password']) destination_transformer.save_with_unwind() else: logging.error( "type {} not yet supported for KGX load-and-merge operation.". format(destination['type'])) return merged_transformer
def test_neo_to_graph_transform(): """ load from neo4j and transform to nx graph """ nt = NeoTransformer(host='localhost', port='7474', username='******', password='******') nt.load() nt.report() t = PandasTransformer(nt.graph) t.save(os.path.join(target_dir, "neo_graph.csv"))
def test_neo_to_graph_transform(): """ load from neo4j and transform to nx graph """ n = NeoTransformer() n.load() n.report() t = PandasTransformer(n) t.save("target/neo_graph.csv")
def test_neo_to_graph_transform(): """ load from neo4j and transform to nx graph """ nt = NeoTransformer(uri=DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD) nt.load() nt.report() t = PandasTransformer(nt.graph) t.save(os.path.join(target_dir, "neo_graph.csv"))
def test_neo_to_graph_transform(): """ load from neo4j and transform to nx graph """ return nt = NeoTransformer(host='localhost', port='7474', username='', password='') nt.load() nt.report() t = PandasTransformer(nt.graph) t.save("target/neo_graph.csv")
help='A filter that can be applied to nodes') parser.add_argument('--edge_filter', action='append', help='A filter that can be applied to edges') parser.add_argument('--uri', help='URI/URL for Neo4j (including port)', default='localhost:7474') parser.add_argument('--username', help='username', default='neo4j') parser.add_argument('--password', help='password', default='demo') args = parser.parse_args() # Initialize NeoTransformer nt = NeoTransformer(None, uri=args.uri, username=args.username, password=args.password) if args.node_filter: for f in args.node_filter: k, v = f.split('=') nt.set_node_filter(k, set(v)) if args.edge_filter: for f in args.edge_filter: k, v = f.split('=') nt.set_edge_filter(k, set(v)) # Read from Neo4j with the given filter constraints (if any) nt.load() nt.report()
action='append', help='A filter that can be applied to node and/or edges') parser.add_argument('--host', help='host to connect with Neo4j', default='localhost') parser.add_argument('--bolt_port', help='Bolt port to connect with Neo4j', default='7687') parser.add_argument('--username', help='username (default: neo4j)', default='neo4j') parser.add_argument('--password', help='password (default: demo)', default='demo') args = parser.parse_args() # Initialize NeoTransformer n = NeoTransformer(None, args.host, {'bolt': args.bolt_port}, args.username, args.password) if args.filter is not None: if len(args.filter) > 0: for filter in args.filter: k, v = filter.split('=') # Set filters n.set_filter(k, v) # Read from Neo4j with the given filter constraints (if any) n.load() n.report()
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph: """Load and merge sources defined in the config YAML. Args: yaml_file: A string pointing to a KGX compatible config YAML. Returns: networkx.MultiDiGraph: The merged graph. """ gm = GraphMerge() config = parse_load_config(yaml_file) transformers: List = [] # make sure all files exist before we start load for key in config['target']: target = config['target'][key] logging.info("Checking that file exist for {}".format(key)) if target['type'] in get_file_types(): for f in target['filename']: if not os.path.exists(f) or not os.path.isfile(f): raise FileNotFoundError( "File {} for transform {} in yaml file {} " "doesn't exist! Dying.", f, key, yaml_file) # read all the sources defined in the YAML for key in config['target']: target = config['target'][key] logging.info("Loading {}".format(key)) if target['type'] in get_file_types(): # loading from a file transformer = get_transformer(target['type'])() for f in target['filename']: transformer.parse(f, input_format='tsv') transformer.graph.name = key transformers.append(transformer) elif target['type'] == 'neo4j': transformer = NeoTransformer(None, target['uri'], target['username'], target['password']) transformer.load() transformers.append(transformer) transformer.graph.name = key else: logging.error("type {} not yet supported".format(target['type'])) stats_filename = f"{key}_stats.yaml" generate_graph_stats(transformer.graph, key, stats_filename) # merge all subgraphs into a single graph merged_graph = gm.merge_all_graphs([x.graph for x in transformers]) merged_graph.name = 'merged_graph' generate_graph_stats(merged_graph, merged_graph.name, f"merged_graph_stats.yaml") # write the merged graph if 'destination' in config: for _, destination in config['destination'].items(): if destination['type'] == 'neo4j': destination_transformer = NeoTransformer( merged_graph, uri=destination['uri'], username=destination['username'], password=destination['password']) destination_transformer.save_with_unwind() elif destination['type'] in get_file_types(): destination_transformer = get_transformer( destination['type'])(merged_graph) destination_transformer.save(destination['filename'], extension=destination['type']) else: logging.error( "type {} not yet supported for KGX load-and-merge operation." .format(destination['type'])) return merged_graph
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph: """Load and merge sources defined in the config YAML. Args: yaml_file: A string pointing to a KGX compatible config YAML. Returns: networkx.MultiDiGraph: The merged graph. """ config = parse_load_config(yaml_file) transformers: List = [] # make sure all files exist before we start load for key in config['target']: target = config['target'][key] logging.info("Checking that file exist for {}".format(key)) if target['type'] in get_file_types(): for f in target['filename']: if not os.path.exists(f) or not os.path.isfile(f): raise FileNotFoundError( "File {} for transform {} in yaml file {} " "doesn't exist! Dying.", f, key, yaml_file) # read all the sources defined in the YAML for key in config['target']: target = config['target'][key] logging.info("Loading {}".format(key)) if target['type'] in get_file_types(): # loading from a file try: transformer = get_transformer(target['type'])() if target['type'] in {'tsv', 'neo4j'}: if 'filters' in target: apply_filters(target, transformer) for f in target['filename']: transformer.parse(f, input_format='tsv') transformer.graph.name = key if 'operations' in target: apply_operations(target, transformer) transformers.append(transformer) except: logging.error("Failed loading {}".format(f)) elif target['type'] == 'neo4j': transformer = NeoTransformer(None, target['uri'], target['username'], target['password']) if 'filters' in target: apply_filters(target, transformer) transformer.load() if 'operations' in target: apply_operations(target, transformer) transformers.append(transformer) transformer.graph.name = key else: logging.error("type {} not yet supported".format(target['type'])) stats_filename = f"{key}_stats.yaml" generate_graph_stats(transformer.graph, key, stats_filename) # merge all subgraphs into a single graph merged_graph = merge_all_graphs([x.graph for x in transformers]) merged_graph.name = 'merged_graph' generate_graph_stats(merged_graph, merged_graph.name, "merged_graph_stats.yaml", ['provided_by'], ['provided_by']) # write the merged graph if 'destination' in config: for _, destination in config['destination'].items(): if destination['type'] == 'neo4j': destination_transformer = NeoTransformer( merged_graph, uri=destination['uri'], username=destination['username'], password=destination['password']) destination_transformer.save() elif destination['type'] in get_file_types(): destination_transformer = get_transformer( destination['type'])(merged_graph) mode = 'w:gz' if destination['type'] in {'tsv'} else None if destination['type'] in {'nt', 'nt.gz', 'ttl'}: destination_transformer.set_property_types(PROPERTY_TYPES) destination_transformer.save(destination['filename'], output_format=destination['type'], mode=mode) else: logging.error( "type {} not yet supported for KGX load-and-merge operation." .format(destination['type'])) return merged_graph