Ejemplo n.º 1
0
    def test_edge_iterate_using_gremlin(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.E('first').batch(batch_size).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = g.run(query)
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        query = g.E('first').batch(batch_size).shuffle().values()
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = g.run(query)
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)
    def test_edge_shuffle(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.E('first').batch(batch_size).shuffle(
            traverse=True).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = sampler.next()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        g.close()
Ejemplo n.º 3
0
    def test_weighted(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(self.batch_size_).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_weights(edges)
    def test_2hop(self):
        """ Sample 2 hops of neighbors.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler([self._edge1_type, self._edge2_type],
                                        expand_factor=expand_factor,
                                        strategy="in_degree")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids)
        utils.check_edge_type(edges,
                              src_type=self._node1_type,
                              dst_type=self._node2_type,
                              edge_type=self._edge1_type)
        utils.check_edge_shape(edges, ids.size * expand_factor[0])
        utils.check_edge_attrs(edges)
        utils.check_edge_labels(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, self._node2_ids)
        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor[0])
        utils.check_node_weights(nodes)
        utils.check_node_labels(nodes)

        ids = nodes.ids.reshape(-1)
        edges = nbrs.layer_edges(2)
        nodes = nbrs.layer_nodes(2)

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node1_range,
                                       expected_src_ids=ids)
        utils.check_edge_type(edges,
                              src_type=self._node2_type,
                              dst_type=self._node1_type,
                              edge_type=self._edge2_type)
        utils.check_edge_shape(edges, ids.size * expand_factor[1])
        utils.check_edge_attrs(edges)
        utils.check_edge_weights(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, self._node1_ids)
        utils.check_node_type(nodes, node_type=self._node1_type)
        utils.check_node_shape(nodes, ids.size * expand_factor[1])
Ejemplo n.º 5
0
    def test_weighted_labeled(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.WEIGHTED, utils.LABELED], False)
        decoder = gl.Decoder(weighted=True, labeled=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(self.batch_size_).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_labels(edges)
        utils.check_edge_weights(edges)

        g.close()
Ejemplo n.º 6
0
    def test_weighted(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        query = g.E("first").batch(self.batch_size_).alias('e').values()
        ds = gl.Dataset(query, window=1)

        edges = ds.next()['e']
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_weights(edges)

        g.close()
Ejemplo n.º 7
0
    def test_edge_iterate(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_src = []
        res_dst = []
        max_iter = 100
        for _ in range(max_iter):
            try:
                edges = sampler.get()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = sampler.get()
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)

        g.close()