Esempio n. 1
0
def test_graph_to_neo_load():
    """
    load nx graph to neo4j test
    """

    t = PandasTransformer()
    t.parse("tests/resources/x1n.csv")
    t.parse("tests/resources/x1e.csv")
    t.report()
    n = NeoTransformer(t)
    n.save()
    n.neo4j_report()
Esempio n. 2
0
def test_neo_to_graph_upload():
    """ loads a neo4j graph from a json file
    """
    jt = JsonTransformer()
    jt.parse('resources/robodb2.json')

    nt = NeoTransformer(jt.graph,
                        uri=DEFAULT_NEO4J_URL,
                        username=DEFAULT_NEO4J_USERNAME,
                        password=DEFAULT_NEO4J_PASSWORD)
    nt.save()
    nt.neo4j_report()
Esempio n. 3
0
def test_csv_to_neo_load():
    """
    load csv to neo4j test
    """
    pt = PandasTransformer()
    pt.parse(os.path.join(resource_dir, "cm_nodes.csv"))
    pt.parse(os.path.join(resource_dir, "cm_edges.csv"))
    nt = NeoTransformer(pt.graph,
                        uri=DEFAULT_NEO4J_URL,
                        username=DEFAULT_NEO4J_USERNAME,
                        password=DEFAULT_NEO4J_PASSWORD)
    nt.save()
    nt.neo4j_report()
Esempio n. 4
0
def test_get_edges(clean_slate, query):
    g = get_graph('kgx-unit-test')[3]
    t = NeoTransformer(g,
                       uri=DEFAULT_NEO4J_URL,
                       username=DEFAULT_NEO4J_USERNAME,
                       password=DEFAULT_NEO4J_PASSWORD)
    t.save()

    for k, v in query[1].items():
        t.set_edge_filter(k, v)

    edges = t.get_edges()
    edge_list = [x[1] for x in edges]
    assert len(edges) == query[2]
Esempio n. 5
0
def test_load(clean_slate, query):
    t = NeoTransformer(query[0],
                       uri=DEFAULT_NEO4J_URL,
                       username=DEFAULT_NEO4J_USERNAME,
                       password=DEFAULT_NEO4J_PASSWORD)
    t.save()

    nr = t.http_driver.query("MATCH (n) RETURN count(n)")
    [node_counts] = [x for x in nr][0]
    assert node_counts == query[1]

    er = t.http_driver.query("MATCH ()-[p]->() RETURN count(p)")
    [edge_counts] = [x for x in er][0]
    assert edge_counts == query[2]
Esempio n. 6
0
def test_get_nodes(clean_slate, query):
    g = query[0]
    t = NeoTransformer(g,
                       uri=DEFAULT_NEO4J_URL,
                       username=DEFAULT_NEO4J_USERNAME,
                       password=DEFAULT_NEO4J_PASSWORD)
    t.save()

    for k, v in query[1].items():
        t.set_node_filter(k, v)
    nodes = t.get_nodes()

    assert len(nodes) == query[2]
    node_ids = [x['id'] for x in nodes]
    for x in query[3]:
        assert x in node_ids
Esempio n. 7
0
def test_save_merge(clean_slate):
    g = get_graph('kgx-unit-test')[2]
    t = NeoTransformer(g,
                       uri=DEFAULT_NEO4J_URL,
                       username=DEFAULT_NEO4J_USERNAME,
                       password=DEFAULT_NEO4J_PASSWORD)
    t.save()

    t.graph.add_node('B',
                     id='B',
                     publications=['PMID:1', 'PMID:2'],
                     category=['biolink:NamedThing'])
    t.graph.add_node('C', id='C', source='kgx-unit-test')
    t.graph.add_edge('A',
                     'B',
                     subject='A',
                     object='B',
                     edge_label='biolink:related_to',
                     test_prop='VAL123')
    assert t.graph.number_of_nodes() == 3
    t.save()

    nr = t.http_driver.query("MATCH (n) RETURN n")
    for node in nr:
        data = node[0]['data']
        if data['id'] == 'B':
            assert 'category' in data and data['category'] == [
                'biolink:NamedThing'
            ]
            assert 'publications' in data and data['publications'] == [
                'PMID:1', 'PMID:2'
            ]

    er = t.http_driver.query("MATCH ()-[p]-() RETURN p",
                             data_contents=True,
                             returns=(Node, Relationship, Node))
    for edge in er:
        data = edge[0].properties
        # assert data['id'] == 'A-biolink:related_to-B'
        assert data['subject'] == 'A'
        assert data['object'] == 'B'
        assert data['edge_label'] == 'biolink:related_to'
        assert data['test_prop'] == 'VAL123'
Esempio n. 8
0
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        networkx.MultiDiGraph: The merged graph.

    """
    gm = GraphMerge()
    config = parse_load_config(yaml_file)
    transformers: List = []

    # make sure all files exist before we start load
    for key in config['target']:
        target = config['target'][key]
        logging.info("Checking that file exist for {}".format(key))
        if target['type'] in get_file_types():
            for f in target['filename']:
                if not os.path.exists(f) or not os.path.isfile(f):
                    raise FileNotFoundError(
                        "File {} for transform {}  in yaml file {} "
                        "doesn't exist! Dying.", f, key, yaml_file)

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            for f in target['filename']:
                transformer.parse(f, input_format='tsv')
                transformer.graph.name = key
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            transformer.load()
            transformers.append(transformer)
            transformer.graph.name = key
        else:
            logging.error("type {} not yet supported".format(target['type']))
        stats_filename = f"{key}_stats.yaml"
        generate_graph_stats(transformer.graph, key, stats_filename)

    # merge all subgraphs into a single graph
    merged_graph = gm.merge_all_graphs([x.graph for x in transformers])
    merged_graph.name = 'merged_graph'
    generate_graph_stats(merged_graph, merged_graph.name,
                         f"merged_graph_stats.yaml")

    # write the merged graph
    if 'destination' in config:
        for _, destination in config['destination'].items():
            if destination['type'] == 'neo4j':
                destination_transformer = NeoTransformer(
                    merged_graph,
                    uri=destination['uri'],
                    username=destination['username'],
                    password=destination['password'])
                destination_transformer.save_with_unwind()
            elif destination['type'] in get_file_types():
                destination_transformer = get_transformer(
                    destination['type'])(merged_graph)
                destination_transformer.save(destination['filename'],
                                             extension=destination['type'])
            else:
                logging.error(
                    "type {} not yet supported for KGX load-and-merge operation."
                    .format(destination['type']))

    return merged_graph
Esempio n. 9
0
parser.add_argument('--edges', help='file with edges in TSV format')
parser.add_argument('--uri',
                    help='URI/URL for Neo4j (including port)',
                    default='localhost:7474')
parser.add_argument('--username', help='username', default='neo4j')
parser.add_argument('--password', help='password', default='demo')
args = parser.parse_args()

if args.nodes is None and args.edges is None:
    usage()
    exit()

# Initialize PandasTransformer
t = PandasTransformer()

# Load nodes and edges into graph
if args.nodes:
    t.parse(args.nodes, input_format='tsv')
if args.edges:
    t.parse(args.edges, input_format='tsv')

# Initialize NeoTransformer
n = NeoTransformer(t.graph,
                   uri=args.uri,
                   username=args.username,
                   password=args.password)

# Save graph into Neo4j
n.save()
n.neo4j_report()
Esempio n. 10
0
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        networkx.MultiDiGraph: The merged graph.

    """
    config = parse_load_config(yaml_file)
    transformers: List = []

    # make sure all files exist before we start load
    for key in config['target']:
        target = config['target'][key]
        logging.info("Checking that file exist for {}".format(key))
        if target['type'] in get_file_types():
            for f in target['filename']:
                if not os.path.exists(f) or not os.path.isfile(f):
                    raise FileNotFoundError(
                        "File {} for transform {}  in yaml file {} "
                        "doesn't exist! Dying.", f, key, yaml_file)

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            try:
                transformer = get_transformer(target['type'])()
                if target['type'] in {'tsv', 'neo4j'}:
                    if 'filters' in target:
                        apply_filters(target, transformer)
                for f in target['filename']:
                    transformer.parse(f, input_format='tsv')
                    transformer.graph.name = key
                if 'operations' in target:
                    apply_operations(target, transformer)
                transformers.append(transformer)
            except:
                logging.error("Failed loading {}".format(f))
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            if 'filters' in target:
                apply_filters(target, transformer)
            transformer.load()
            if 'operations' in target:
                apply_operations(target, transformer)
            transformers.append(transformer)
            transformer.graph.name = key
        else:
            logging.error("type {} not yet supported".format(target['type']))
        stats_filename = f"{key}_stats.yaml"
        generate_graph_stats(transformer.graph, key, stats_filename)

    # merge all subgraphs into a single graph
    merged_graph = merge_all_graphs([x.graph for x in transformers])
    merged_graph.name = 'merged_graph'
    generate_graph_stats(merged_graph, merged_graph.name,
                         "merged_graph_stats.yaml", ['provided_by'],
                         ['provided_by'])

    # write the merged graph
    if 'destination' in config:
        for _, destination in config['destination'].items():
            if destination['type'] == 'neo4j':
                destination_transformer = NeoTransformer(
                    merged_graph,
                    uri=destination['uri'],
                    username=destination['username'],
                    password=destination['password'])
                destination_transformer.save()
            elif destination['type'] in get_file_types():
                destination_transformer = get_transformer(
                    destination['type'])(merged_graph)
                mode = 'w:gz' if destination['type'] in {'tsv'} else None
                if destination['type'] in {'nt', 'nt.gz', 'ttl'}:
                    destination_transformer.set_property_types(PROPERTY_TYPES)
                destination_transformer.save(destination['filename'],
                                             output_format=destination['type'],
                                             mode=mode)
            else:
                logging.error(
                    "type {} not yet supported for KGX load-and-merge operation."
                    .format(destination['type']))

    return merged_graph