Ejemplo n.º 1
0
    def test_node_iterate_using_gsl(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.V('user').batch(batch_size).values()
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = g.run(query)
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0], self.value_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        query = g.V('user').batch(batch_size).shuffle().values()
        max_iter = 10
        for i in range(max_iter):
            nodes = g.run(query)
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
Ejemplo n.º 2
0
    def test_edge_iterate_using_gremlin(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.E('first').batch(batch_size).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = g.run(query)
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        query = g.E('first').batch(batch_size).shuffle().values()
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = g.run(query)
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)
Ejemplo n.º 3
0
    def test_node_iterate(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0][0], self.value_range_[0][1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_node_iterate_from_graph(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED], False)
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_SRC)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "user")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.src_range_[0], self.src_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_SRC)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_DST)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "item")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_DST)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        g.close()
Ejemplo n.º 5
0
 def test_full_sample(self):
   q = self.g.V(self._node2_type).batch(4).alias('a') \
             .outV(self._edge2_type).sample(3).by("full").alias('b') \
             .values(lambda x: (x['a'].ids, x['b'].ids, x['b'].offsets))
   dataset = gl.Dataset(q)
   while True:
     try:
       src, nbrs, offsets = dataset.next()
       start = 0
       for idx, offset in enumerate(offsets):
         expected_nbrs = utils.fixed_dst_ids(src[idx], self._node1_range)
         assert offset == min(len(expected_nbrs), 3)
         utils.check_subset(nbrs[start: start + offset], expected_nbrs)
         start += offset
     except gl.OutOfRangeError:
       break
Ejemplo n.º 6
0
    def test_edge_iterate(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_src = []
        res_dst = []
        max_iter = 100
        for _ in range(max_iter):
            try:
                edges = sampler.get()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = sampler.get()
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)

        g.close()