예제 #1
0
 def upsert_edge(self, edge: Edge, node_type: str = None,
                 edge_predicate: str = None):
     log.debug("Upserting edge...")
     if node_type is None:
         node_type = DEFAULT_NODE_TYPE
     if edge_predicate is None:
         edge_predicate = DEFAULT_EDGE_PREDICATE
     exists, uid = self.exists_edge(edge, node_type=node_type,
                                    edge_predicate=edge_predicate)
     if exists:
         log.warning("upsert_edge() was called with existing edge:")
         log.warning(str(edge))
         return
     else:
         uid_a = self.upsert_node(
             Node(node_type=node_type, value=edge.src), echo=True).db_id
         uid_b = self.upsert_node(
             Node(node_type=node_type, value=edge.tgt), echo=True).db_id
         nquads = """
         <{a}> <{ep}> _:e .
         _:e <{ep}> <{b}> .
         _:e <type> "{edge_type}" .
         _:e <value> "{edge_value}" .
         """.format(a=uid_a, b=uid_b,
                    ep=edge_predicate, edge_type=edge.edge_type,
                    edge_value=edge.edge_value)
         log.debug("Edge not found, adding nquad \n{}".format(nquads))
         self.mutate(nquads)
예제 #2
0
def _setup_connected(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    g.upsert_node(n1)
    g.upsert_node(n2)
    e = Edge(src="ABC", tgt="BCD", edge_value=0)
    g.add_edge(e)
    g.save()
예제 #3
0
 def _parse_node(d: dict) -> Node:
     n = Node(value="")
     for k, v in d.items():
         if k == DEFAULT_NODE_TYPE:
             n.value = v
         elif k == "uid":
             n.db_id = v
     return n
예제 #4
0
 def _parse_path(d: dict, node_type: str, edge_predicate: str) -> tuple:
     lt = [Node(value=d[node_type])]
     while True:
         if edge_predicate not in d:
             break
         d = d[edge_predicate][0][edge_predicate][0]
         v = d[node_type]
         lt.append(Node(value=v))
     return tuple(lt)
예제 #5
0
def test_dgraph_find_value_reverse(dg: Dgraph):
    na = Node(value="ATCG")
    nb = Node(value="ATCC")
    dg.upsert_node(na)
    dg.upsert_node(nb)
    e = Edge(src="ATCG", tgt="ATCC", edge_type="genome_a", edge_value=0)
    dg.upsert_edge(e)

    _, uid_b = dg.exists_node(nb)
    v2 = dg.find_value_reverse(uid_b, "genome_a")
    assert v2 == 0
예제 #6
0
def _setup_connected_distant(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    n3 = Node(value="CDE")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.upsert_node(n3)
    e1 = Edge(src="ABC", tgt="BCD", edge_value=0)
    g.add_edge(e1)
    e2 = Edge(src="BCD", tgt="CDE", edge_value=1)
    g.add_edge(e2)
    g.save()
예제 #7
0
def test_dgraph_find_value(dg: Dgraph):
    na = Node(value="ATCG")
    nb = Node(value="ATCC")
    dg.upsert_node(na)
    dg.upsert_node(nb)
    e = Edge(src="ATCG", tgt="ATCC", edge_type="genome_a", edge_value=0)
    dg.upsert_edge(e)

    _, uid = dg.exists_node(na)
    v = dg.find_value(uid, "genome_a")
    log.info("Found value for {} as {}".format(uid, v))
    assert v == 0
예제 #8
0
    def path(self, node_a: str, node_b: str) -> typing.Tuple[tuple, tuple]:
        log.info("Checking all paths between {} and {}".format(node_a, node_b))
        exists, uid_a = self.exists_node(Node(value=node_a))
        if not exists:
            return tuple(), tuple()
        exists, uid_b = self.exists_node(Node(value=node_b))
        if not exists:
            return tuple(), tuple()
        log.debug("Both nodes exist, checking connectivity...")

        connected, src_edges = self.connected(node_a, node_b)
        if not connected:
            return tuple(), tuple()

        log.debug("Nodes are connected")
        paths = []
        paths_meta = []

        for src_edge in src_edges:
            log.info("Finding path between {} and {} with source edge {}"
                     "".format(node_a, node_b, src_edge))
            tgt_edges = self.find_edges_reverse(node_b)
            for tgt_edge in tgt_edges:
                not_matching_edge = tgt_edge.edge_type != src_edge.edge_type
                not_directional = tgt_edge.edge_value < src_edge.edge_value
                ln = tgt_edge.edge_value - src_edge.edge_value
                pass_recursion = ln > sys.getrecursionlimit()
                if not_matching_edge or not_directional or pass_recursion:
                    log.info("Skipping tgt edge {} for src edge {}".format(
                        tgt_edge, src_edge
                    ))
                    continue
                log.debug("Checking path for type: {}".format(
                    tgt_edge.edge_type))
                log.debug("Found start value of {} and end value of {}".format(
                    src_edge.edge_value, tgt_edge.edge_value))
                query = self._path_query(
                    node_type=DEFAULT_NODE_TYPE, node_value=node_a,
                    edge_predicate=DEFAULT_EDGE_PREDICATE,
                    edge_type=src_edge.edge_type,
                    start_int=src_edge.edge_value, end_int=tgt_edge.edge_value)
                r = self.query(query)
                log.info(r)
                if len(r['q']) != 1:
                    log.warning("Path not found for type: {}".format(
                        src_edge.edge_type
                    ))
                    continue
                p = self._parse_path(
                    r['q'][0], DEFAULT_NODE_TYPE, DEFAULT_EDGE_PREDICATE)
                paths.append(p)
                paths_meta.append({'edge_type': tgt_edge.edge_type})
        return tuple(paths), tuple(paths_meta)
예제 #9
0
def _setup_connected_shortcut(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    n3 = Node(value="CDE")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.upsert_node(n3)
    e1 = Edge(src="ABC", tgt="BCD", edge_type="path1", edge_value=0)
    g.add_edge(e1)
    e2 = Edge(src="BCD", tgt="CDE", edge_type="path1", edge_value=1)
    g.add_edge(e2)
    e1_short = Edge(src="ABC", tgt="CDE", edge_type="path2", edge_value=0)
    g.add_edge(e1_short)
    g.save()
예제 #10
0
def test_dgraph_find_edges(dg: Dgraph):
    na = Node(value="ATCG")
    nb = Node(value="ATCC")
    dg.upsert_node(na)
    dg.upsert_node(nb)
    e = Edge(src="ATCG", tgt="ATCC", edge_type="genome_a", edge_value=0)
    dg.upsert_edge(e)

    ef_set = dg.find_edges("ATCG")
    assert len(ef_set) == 1
    ef = ef_set[0]
    assert ef.src == "ATCG"
    assert ef.tgt == "ATCC"
    assert ef.edge_value == 0
    assert ef.edge_type == "genome_a"
예제 #11
0
def test_dgraph_exists_node(dg: Dgraph):
    na = Node(value='a')
    exists, _ = dg.exists_node(na)
    assert not exists
    dg.upsert_node(na, echo=False)
    exists, uid = dg.exists_node(na)
    assert exists
    log.info("uid: {}".format(uid))
예제 #12
0
def test_graph_basics_nodes(g: Graph):
    expected = ["ABC", "BCE", "CEF"]
    expected = set(Node(value=v) for v in expected)
    for node in expected:
        g.upsert_node(node)
    # We assume the backing store might change ordering and only care for the
    # node values
    assert set(n.value for n in g.nodes) == set(ne.value for ne in expected)
예제 #13
0
def test_graph_connected_repeats_one_middle_path(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    n3 = Node(value="CDE")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.upsert_node(n3)
    e1 = Edge(src="ABC", tgt="BCD", edge_value=0)
    g.add_edge(e1)
    e2 = Edge(src="BCD", tgt="BCD", edge_value=1)
    g.add_edge(e2)
    e3 = Edge(src="BCD", tgt="CDE", edge_value=2)
    g.add_edge(e3)
    g.save()

    paths, _ = g.path('ABC', 'CDE')
    assert len(paths) == 1
예제 #14
0
def _setup_connected_multiple(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    n2_alt = Node(value="XYZ")
    n3 = Node(value="CDE")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.upsert_node(n3)
    g.upsert_node(n2_alt)
    e1 = Edge(src="ABC", tgt="BCD", edge_type="path1", edge_value=0)
    g.add_edge(e1)
    e2 = Edge(src="BCD", tgt="CDE", edge_type="path1", edge_value=1)
    g.add_edge(e2)
    e1_alt = Edge(src="ABC", tgt="XYZ", edge_type="path2", edge_value=0)
    g.add_edge(e1_alt)
    e2_alt = Edge(src="XYZ", tgt="CDE", edge_type="path2", edge_value=1)
    g.add_edge(e2_alt)
    g.save()
예제 #15
0
def test_graph_connected_repeats_full_path(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    n3 = Node(value="CDE")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.upsert_node(n3)
    e1 = Edge(src="ABC", tgt="BCD", edge_value=0)
    g.add_edge(e1)
    e2 = Edge(src="BCD", tgt="CDE", edge_value=1)
    g.add_edge(e2)
    e3 = Edge(src="CDE", tgt="ABC", edge_value=2)
    g.add_edge(e3)
    e4 = Edge(src="ABC", tgt="BCD", edge_value=3)
    g.add_edge(e4)
    e5 = Edge(src="BCD", tgt="CDE", edge_value=4)
    g.add_edge(e5)
    g.save()

    try:
        paths, _ = g.path('ABC', 'CDE')
    except:
        raise GraphException(g)

    assert len(paths) == 3

    c = 0
    for path in paths:
        assert path[0].value == "ABC"
        assert path[-1].value == "CDE"
        if len(path) == 3:
            assert path[1].value == "BCD"
            # There are 2 copies of this
            c += 1
        elif len(path) == 6:
            assert path[1].value == "BCD"
            assert path[2].value == "CDE"
            assert path[3].value == "ABC"
            assert path[4].value == "BCD"
            assert path[5].value == "CDE"

    assert c == 2
예제 #16
0
def test_dgraph_bulk_basics(dgraph_bundled_helper: DgraphBundledHelper):
    dg = dgraph_bundled_helper
    tmp_dir = tempfile.mkdtemp()
    r1 = pathlib.Path(tmp_dir, 'r1.rdf')
    with open(r1, 'a') as f:
        f.write('_:a <{n}> "ATCG" . \n_:b <{n}> "ATGC" . \n'.format(
            n=DEFAULT_NODE_TYPE))
    try:
        dg.load(tmp_dir)
        exists, _ = dg.g.exists_node(Node(value="ATCG"))
        assert exists
        log.info("ATCG exists")
        exists, _ = dg.g.exists_node(Node(value="ATGC"))
        assert exists
        log.info("ATGC exists")
    except Exception as e:
        log.critical("Couldn't find one of the expected nodes")
        files = glob.glob('{}/**'.format(dg.g.out_dir), recursive=True)
        log.critical("Files is out_dir are:\n{}".format(files))
        raise e
예제 #17
0
def test_dgraph_find_depth(dg: Dgraph):
    na = Node(value="ATCG")
    nb = Node(value="ATCC")
    dg.upsert_node(na)
    dg.upsert_node(nb)
    e = Edge(src="ATCG", tgt="ATCC", edge_type="genome_a", edge_value=0)
    dg.upsert_edge(e)
    _, uid_a = dg.exists_node(na)
    _, uid_b = dg.exists_node(nb)
    d = dg.find_depth(uid_a, uid_b, "genome_a")
    log.info("Found depth for {} to {} as {}".format(uid_a, uid_b, d))
    assert d == 1

    nc = Node(value="ATGG")
    dg.upsert_node(nc)
    e2 = Edge(src="ATCC", tgt="ATGG", edge_type="genome_a", edge_value=1)
    dg.upsert_edge(e2)
    _, uid_c = dg.exists_node(nc)
    d2 = dg.find_depth(uid_a, uid_c, "genome_a")
    log.info("Found depth for {} to {} as {}".format(uid_a, uid_c, d))
    assert d2 == 2
예제 #18
0
def test_graph_node_labels(g: Graph):
    # TODO: we dont support node labels yet for our Dgraph backend
    if isinstance(g, Dgraph):
        return
    expected = [
        Node(value="ABC", labels={"species": "dog"}),
        Node(value="BCE", labels={"species": "cat"})
    ]
    for node in expected:
        g.upsert_node(node)

    assert len(g.nodes) == 2

    for node in g.nodes:
        if node.value == "ABC":
            assert node.labels == expected[0].labels
        elif node.value == "BCE":
            assert node.labels == expected[1].labels
        else:
            # Something went wrong
            assert False
예제 #19
0
    def find_edges_reverse(self, node_value: str) -> tuple:
        exists, uid = self.exists_node(Node(value=node_value))
        if not exists:
            log.warning("node_value {} doesn't exist".format(node_value))
            return tuple()
        # Find the uid of the edge
        query = """
        {{
            q(func: has({et})) @filter(uid_in({et}, {tgt})) {{
                uid
            }}
        }}
        """.format(et=DEFAULT_EDGE_PREDICATE, tgt=uid)
        r = self.query(query)
        if len(r['q']) == 0:
            return tuple()
        edge_uid = r['q'][0]['uid']

        # Find the source edge
        query_2 = """
        {{
            q(func: has({nt})) @filter(uid_in({et}, {uid})) {{
                {nt}
            }}
        }}
        """.format(nt=DEFAULT_NODE_TYPE, et=DEFAULT_EDGE_PREDICATE,
                   uid=edge_uid)
        r_2 = self.query(query_2)
        src = r_2['q'][0][DEFAULT_NODE_TYPE]

        # Finally, query the edge
        query_3 = """
        {{
            q(func: has({et})) @filter(uid_in({et}, {tgt})) {{
                expand(_all_) {{
                    uid
                    expand(_all_) {{
                        {nt}
                    }}
                }}
            }}
        }}
        """.format(nv=node_value, nt=DEFAULT_NODE_TYPE,
                   et=DEFAULT_EDGE_PREDICATE, tgt=uid)
        r_3 = self.query(query_3)
        if len(r_3['q']) == 0:
            return tuple()

        return self._parse_edges_reverse(
            list_edges=r_3['q'], node_type=DEFAULT_NODE_TYPE,
            edge_predicate=DEFAULT_EDGE_PREDICATE, src=src)
예제 #20
0
def test_graph_basics_edges_dgraph(dg: Dgraph):
    g = dg
    expected = ["ABC", "BCE", "CEF"]
    expected = [Node(value=v) for v in expected]

    g.add_edge(Edge(src=expected[0].value, tgt=expected[1].value))
    g.add_edge(Edge(src=expected[1].value, tgt=expected[2].value))
    try:
        # In Dgraph we can retrieve the actual kmer value
        assert {(e.src, e.tgt) for e in g.edges} == {
            (expected[0].value, expected[1].value),
            (expected[1].value, expected[2].value)}
    except:
        raise GraphException(g)
예제 #21
0
 def _parse_node(node: dict) -> Node:
     """
     Parses the node returned by LemonGraph into our Node class
     :param node:
     :return:
     """
     # Create a dictionary to store other labels
     labels = {}
     for k, v in node.items():
         if k not in ('value', 'type', 'ID'):
             labels[k] = v
     labels = None if not labels else labels
     return Node(value=node['value'],
                 node_type=node['type'],
                 db_id=node['ID'],
                 labels=labels)
예제 #22
0
 def upsert_node(self, node: Node, echo: bool = True) -> typing.Optional[
         Node]:
     exists, uid = self.exists_node(node)
     if exists:
         if echo:
             return Node(node_type=node.node_type, value=node.value,
                         db_id=uid)
         else:
             return
     else:
         nquads = '_:{value} <{type}> "{value}" .'.format(
             value=node.value, type=node.node_type)
         log.debug("Node not found, adding nquads \n{}".format(nquads))
         self.mutate(nquads)
         if echo:
             return self.upsert_node(node)
         else:
             return
예제 #23
0
def test_graph_basics_edges_lemongraph(lgr: LGGraph):
    g = lgr
    expected = ["ABC", "BCE", "CEF"]
    expected = [Node(value=v) for v in expected]

    nodes_with_ids = []
    for node in expected:
        # Returned the node with db_id set, these are required to check edges
        n = g.upsert_node(node)
        nodes_with_ids.append(n)

    g.add_edge(Edge(src=expected[0].value, tgt=expected[1].value))
    g.add_edge(Edge(src=expected[1].value, tgt=expected[2].value))
    try:
        # In lemongraph these are stored as numerical IDs
        assert {(e.src, e.tgt) for e in g.edges} == {
            (nodes_with_ids[0].db_id, nodes_with_ids[1].db_id),
            (nodes_with_ids[1].db_id, nodes_with_ids[2].db_id)}
    except:
        raise GraphException(g)
예제 #24
0
def _setup_connected_no_node(g: Graph):
    n1 = Node(value="ABC")
    g.upsert_node(n1)
    g.save()
예제 #25
0
    def update_graph(self,
                     km: Kmers,
                     gr: GraphRef,
                     encode: bool = False,
                     buffer: int = 333) -> int:
        self.out_file = os.path.join('outputs/', km.filepath + '.rdf')
        log.debug("Will write to {}".format(self.out_file))
        log.debug("Starting to graph {} in pid {}".format(km, os.getpid()))
        st = time.time()
        c = 0
        while km.has_next:
            header1, kmer1 = km.next()
            # Create the first node
            node1_label = gr.node_label(kmer1) if encode else kmer1
            if not isinstance(self.graph, LGGraph):
                self.graph.upsert_node(Node(value=node1_label))
            c += 1
            # Used to incrementally encode the edges
            edge_c = 0
            # The same contig still has a kmer
            while km.contig_has_next:
                header2, kmer2 = km.next()
                # Create the second node
                node2_label = gr.node_label(kmer2) if encode else kmer2
                if not isinstance(self.graph, LGGraph):
                    self.graph.upsert_node(Node(node2_label))
                # Create an edge
                try:
                    self.graph.add_edge(Edge(
                        src=node1_label,
                        tgt=node2_label,
                        edge_type='{}{}{}'.format(header2, ET_DELIMITER,
                                                  str(km)),
                        edge_value=edge_c,
                    ),
                                        echo=False)
                    # self.graph.add_edge(
                    #     Edge(
                    #         src=node1_label,
                    #         tgt=node2_label,
                    #         edge_type='{} in {}'.format(header2, str(km)),
                    #         edge_value=edge_c,
                    #         labels={
                    #             'f': str(km),
                    #             'hd': header2
                    #         }
                    #     ),
                    #     echo=False
                    # )
                except Exception as e:
                    log.fatal("Failed to add edge between {} and {}".format(
                        node1_label, node2_label))
                    raise e
                # Set node1_id to node2_id
                node1_label = node2_label
                c += 1
                edge_c += 1
                if c % buffer == 0:
                    # log.debug("Committing txn...")
                    self.graph.save(self.out_file)
                if c % 100000 == 0:
                    log.debug("{}/{}, {}%".format(c, len(km),
                                                  int(c / len(km) * 100)))

            # At this point, we're out of kmers on that contig
            # The loop will check if there's still kmers, and reset kmer1
        en = time.time()
        log.debug("Done graphing {}, covering {} kmers in {} s".format(
            km, c, en - st))
        return c
예제 #26
0
def _setup_not_connected(g: Graph):
    n1 = Node(value="ABC")
    n2 = Node(value="BCD")
    g.upsert_node(n1)
    g.upsert_node(n2)
    g.save()