def test_node_iterate_using_gsl(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 query = g.V('user').batch(batch_size).values() res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = g.run(query) utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0], self.value_range_[1]) utils.check_sorted_equal(res_ids, ids) query = g.V('user').batch(batch_size).shuffle().values() max_iter = 10 for i in range(max_iter): nodes = g.run(query) utils.check_node_attrs(nodes) utils.check_subset(nodes.ids, ids) g.close()
def test_edge_shuffle(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.E('first').batch(batch_size).shuffle( traverse=True).values() res_src = [] res_dst = [] max_iter = 100 for i in range(max_iter): try: edges = sampler.next() utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) g.close()
def test_edge_iterate_using_gremlin(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) batch_size = 4 query = g.E('first').batch(batch_size).values() res_src = [] res_dst = [] max_iter = 100 for i in range(max_iter): try: edges = g.run(query) utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) query = g.E('first').batch(batch_size).shuffle().values() max_iter = 10 src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) for i in range(max_iter): edges = g.run(query) utils.check_edge_weights(edges) utils.check_subset(edges.src_ids, src_ids) utils.check_subset(edges.dst_ids, dst_ids)
def test_sampling_with_mask_eager_mode(self): gl.set_eager_mode(True) bs = 8 q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \ .each( lambda e: (e.outV().alias('src'), e.inV().alias('dst') \ .outV(self._edge2_type).sample(3).by('topk').alias('nbr')) ).values(lambda x: (x['src'].ids, x['val'].labels, x['dst'].ids, x['dst'].weights, x['dst'].labels, x['nbr'].ids, x['nbr'].int_attrs)) iteration = 0 for i in range(2): res = [] while True: try: sid, elb, did, dwei, dlb, nid, ni = q.next() utils.check_id_weights(did, dwei) utils.check_equal(dlb, did) iteration += 1 res += list(sid) except gl.OutOfRangeError: break whole = range(self._val_node_range[0], self._val_node_range[1]) expected = [] for elem in whole: expected += [elem] * len( utils.fixed_dst_ids(elem, self._node2_range)) utils.check_sorted_equal(res, expected)
def test_node_iterate(self): file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.node_sampler('user', batch_size=batch_size, strategy="by_order") res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0][0], self.value_range_[0][1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('user', batch_size=batch_size, strategy="random") max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_node_attrs(nodes) utils.check_subset(nodes.ids, ids) g.close()
def test_node_iterate_from_graph(self): file_path = self.gen_test_data([utils.ATTRIBUTED], False) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_SRC) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "user") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.src_range_[0], self.src_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_SRC) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_DST) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "item") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_DST) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) g.close()
def test_1hop(self): """ Sample full neighbors. """ ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, 1, strategy="full") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1
def test_1hop_using_gremlin(self): """ Full neighbor sample with gremlin-like api. """ expand_factor = 2 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample().by("full") \ .inV().emit() nodes = nbrs[2] index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1 utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_traverse_with_mask(self): bs = 8 q = self.g.V(self._node1_type, mask=gl.Mask.TEST).batch(bs).alias('test') \ .values(lambda x: (x['test'].ids, x['test'].int_attrs, x['test'].float_attrs, x['test'].string_attrs)) dataset = gl.Dataset(q) iteration = 0 for i in range(2): res = [] while True: try: ids, i, f, s = dataset.next() utils.check_i_attrs(i, ids) utils.check_f_attrs(f, ids) utils.check_s_attrs(s, ids) iteration += 1 res += list(ids) except gl.OutOfRangeError: break utils.check_sorted_equal(res, range(self._test_node_range[0], self._test_node_range[1]))
def test_node_iterate(self): file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.V('user').batch(batch_size).shuffle(traverse=True).values() res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.next() utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0][0], self.value_range_[0][1]) utils.check_sorted_equal(res_ids, ids)
def test_traverse_with_mask_eager_mode(self): gl.set_eager_mode(True) bs = 8 q = self.g.V(self._node1_type, mask=gl.Mask.TRAIN).batch(bs).alias('train') \ .values(lambda x: (x['train'].ids, x['train'].int_attrs, x['train'].float_attrs, x['train'].string_attrs)) iteration = 0 res = [] while True: try: ids, i, f, s = q.next() utils.check_i_attrs(i, ids) utils.check_f_attrs(f, ids) utils.check_s_attrs(s, ids) iteration += 1 res += list(ids) except gl.OutOfRangeError: break utils.check_sorted_equal( res, range(self._train_node_range[0], self._train_node_range[1]))
def test_edge_iterate(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.edge_sampler('first', batch_size=batch_size, strategy="by_order") res_src = [] res_dst = [] max_iter = 100 for _ in range(max_iter): try: edges = sampler.get() utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) sampler = g.edge_sampler('first', batch_size=batch_size, strategy="random") max_iter = 10 src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) for i in range(max_iter): edges = sampler.get() utils.check_edge_weights(edges) utils.check_subset(edges.src_ids, src_ids) utils.check_subset(edges.dst_ids, dst_ids) g.close()