def test_edge_shuffle(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.E('first').batch(batch_size).shuffle( traverse=True).values() res_src = [] res_dst = [] max_iter = 100 for i in range(max_iter): try: edges = sampler.next() utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) g.close()
def initialize(self): """ Init gl.Graph """ node1_path = utils.gen_node_data([self._node1_type], [self._node1_range], [utils.ATTRIBUTED])[0] node2_path = utils.gen_node_data([self._node2_type], [self._node2_range], [utils.WEIGHTED, utils.LABELED])[0] edge1_path = utils.gen_edge_data(self._node1_type, self._node2_type, self._node1_range, self._node2_range, schema=[utils.ATTRIBUTED, utils.LABELED]) edge2_path = utils.gen_edge_data(self._node2_type, self._node1_type, self._node2_range, self._node1_range, schema=[utils.ATTRIBUTED, utils.WEIGHTED]) edge3_path = utils.gen_edge_data(self._node2_type, self._node2_type, self._node2_range, self._node2_range, schema=[utils.WEIGHTED]) self.__class__.needs_initial = False self.__class__.g = gl.Graph() \ .node(source=node1_path, node_type=self._node1_type, decoder=self._node1_decoder) \ .node(source=node2_path, node_type=self._node2_type, decoder=self._node2_decoder) \ .edge(source=edge1_path, edge_type=(self._node1_type, self._node2_type, self._edge1_type), decoder=self._edge1_decoder, directed=False) \ .edge(source=edge2_path, edge_type=(self._node2_type, self._node1_type, self._edge2_type), decoder=self._edge2_decoder) \ .edge(source=edge3_path, edge_type=(self._node2_type, self._node2_type, self._edge3_type), decoder=self._edge3_decoder, directed=False) self.__class__.g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)
def test_edge_iterate_using_gremlin(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) batch_size = 4 query = g.E('first').batch(batch_size).values() res_src = [] res_dst = [] max_iter = 100 for i in range(max_iter): try: edges = g.run(query) utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) query = g.E('first').batch(batch_size).shuffle().values() max_iter = 10 src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) for i in range(max_iter): edges = g.run(query) utils.check_edge_weights(edges) utils.check_subset(edges.src_ids, src_ids) utils.check_subset(edges.dst_ids, dst_ids)
def load_graph(args): dataset_folder = args.dataset_folder node_type = 'item' edge_type = 'relation' # shoud be split when distributed training. node_path = dataset_folder + "node_table" edge_path = dataset_folder + "edge_table" train_path = dataset_folder + "train_table" val_path = dataset_folder + "val_table" test_path = dataset_folder + "test_table" g = gl.Graph() \ .node(node_path, node_type=node_type, decoder=gl.Decoder(labeled=True, attr_types=["float"] * args.features_num, attr_delimiter=":")) \ .edge(edge_path, edge_type=(node_type, node_type, edge_type), decoder=gl.Decoder(weighted=True), directed=False) \ .node(train_path, node_type=node_type, decoder=gl.Decoder(weighted=True), mask=gl.Mask.TRAIN) \ .node(val_path, node_type=node_type, decoder=gl.Decoder(weighted=True), mask=gl.Mask.VAL) \ .node(test_path, node_type=node_type, decoder=gl.Decoder(weighted=True), mask=gl.Mask.TEST) return g
def test_node_iterate(self): file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.node_sampler('user', batch_size=batch_size, strategy="by_order") res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0][0], self.value_range_[0][1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('user', batch_size=batch_size, strategy="random") max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_node_attrs(nodes) utils.check_subset(nodes.ids, ids) g.close()
def test_node_iterate_using_gsl(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 query = g.V('user').batch(batch_size).values() res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = g.run(query) utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0], self.value_range_[1]) utils.check_sorted_equal(res_ids, ids) query = g.V('user').batch(batch_size).shuffle().values() max_iter = 10 for i in range(max_iter): nodes = g.run(query) utils.check_node_attrs(nodes) utils.check_subset(nodes.ids, ids) g.close()
def main(argv): cur_path = sys.path[0] # Step 1: Construct graph with data source. # Edges: # user<--(buy)-->item # entity<--(relation)-->entity # cond_node--(cond_edge)-->cond_node g = gl.Graph() g.node(os.path.join(cur_path, "data/user"), node_type="user", decoder=gl.Decoder(weighted=True)) \ .node(os.path.join(cur_path, "data/item"), node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \ .edge(os.path.join(cur_path, "data/u-i"), edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True), directed=False) \ .node(os.path.join(cur_path, "data/entity"), node_type="entity", decoder=gl.Decoder(attr_types=['float', 'float', 'float', 'float'], labeled=True)) \ .edge(os.path.join(cur_path, "data/relation"), edge_type=("entity", "entity", "relation"), decoder=gl.Decoder(weighted=True), directed=False) \ .edge(os.path.join(cur_path, "data/relation"), edge_type=("cond_node", "cond_node", "cond_edge"), decoder=gl.Decoder(weighted=True), directed=True) \ .node(os.path.join(cur_path, "data/cond_node"), node_type="cond_node", decoder=gl.Decoder(attr_types=['int','int','float','string'], weighted=True)) g.init() # Step 2: Describe the queries on graph. test_node_iterate(g, local=True) test_edge_iterate(g, local=True) test_truncated_full_edge_sample(g) test_conditional_negtaive_sample(g) g.close()
def test_node_iterate_from_graph(self): file_path = self.gen_test_data([utils.ATTRIBUTED], False) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_SRC) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "user") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.src_range_[0], self.src_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_SRC) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_DST) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "item") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_DST) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) g.close()
def main(argv): cur_path = sys.path[0] cluster = "" job_name = "" task_index = 0 mode = 0 opts, args = getopt.getopt( argv, 'c:j:t:', ['cluster=', 'job_name=', 'task_index=', 'mode=']) for opt, arg in opts: if opt in ('-c', '--cluster'): cluster = arg elif opt in ('-j', '--job_name'): job_name = arg elif opt in ('-t', '--task_index'): task_index = int(arg) elif opt in ('-m', '--mode'): mode = int(arg) else: pass gl.set_tracker_mode(mode) g = gl.Graph() g.node(os.path.join(cur_path, "data/user"), node_type="user", decoder=gl.Decoder(weighted=True)) \ .node(os.path.join(cur_path, "data/item"), node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \ .edge(os.path.join(cur_path, "data/u-i"), edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True)) g.init(cluster=cluster, job_name=job_name, task_index=task_index) if job_name == "server": print("Server {} started.".format(task_index)) g.wait_for_close() if job_name == "client": print("Client {} started.".format(task_index)) q = g.V("user").batch(10).values() for i in range(3): while True: try: print(g.run(q).ids) except gl.OutOfRangeError: print("Out of range......") break q = g.E("buy").batch(10).values() for i in range(3): while True: try: print(g.run(q).dst_ids) except gl.OutOfRangeError: print("Out of range......") break g.close()
def load_graph(config): g = gl.Graph()\ .node("../../data/u2i/u2i_node_attrs", node_type="i", decoder=gl.Decoder(attr_types=config['i_attr_types'], attr_dims=config['i_attr_dims']))\ .node("../../data/u2i/u2i_node_attrs", node_type="u", decoder=gl.Decoder(attr_types=config['u_attr_types'], attr_dims=config['u_attr_dims']))\ .edge("../../data/u2i/u2i_20200222_train", edge_type=("u", "i", "u-i"), decoder=gl.Decoder(weighted=True), directed=False) return g
def load_graph(config): data_dir = config['dataset_folder'] g = gl.Graph() \ .node(data_dir+'ogbl_collab_node', node_type='i', decoder=gl.Decoder(attr_types=['float'] * config['features_num'], attr_dims=[0]*config['features_num'])) \ .edge(data_dir+'ogbl_collab_train_edge', edge_type=('i', 'i', 'train'), decoder=gl.Decoder(weighted=True), directed=False) return g
def load_graph(config): node_type = config['node_type'] edge_type = config['edge_type'] g = gl.Graph().edge("../../data/blogcatelog/edge_table", edge_type=(node_type, node_type, edge_type), decoder=gl.Decoder(weighted=True), directed=False)\ .node("../../data/blogcatelog/node_table", node_type=node_type, decoder=gl.Decoder(weighted=True)) return g
def test_weighted(self): file_path = self.gen_test_data([utils.WEIGHTED]) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_) self.check_weights(nodes)
def initialize(self): self.__class__.needs_initial = False file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) self.__class__.g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) self.__class__.g.init(tracker=utils.TRACKER_PATH)
def test_attributed(self): file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_) self.check_attrs(nodes)
def test_attributed(self): file_path = self.gen_test_data([utils.ATTRIBUTED], False) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) edges = g.get_edges(edge_type="first", src_ids=self.src_ids_, dst_ids=self.dst_ids_)
def load_graph(): g = gl.Graph()\ .node("../../data/FB15k-237/entity_node_table", node_type="entity", decoder=gl.Decoder(attr_types=["int"]))\ .node("../../data/FB15k-237/relation_node_table", node_type="relation", decoder=gl.Decoder(attr_types=["int"]))\ .edge("../../data/FB15k-237/train_tuple_table", edge_type=("entity", "entity", "hrt"), decoder=gl.Decoder(attr_types=["int"], weighted=False)) return g
def load_graph(task_index): node_table, edge_table = FLAGS.tables.split(',')[0:2] attr_types = json.loads(FLAGS.attr_types) attr_dims = json.loads(FLAGS.attr_dims) g = gl.Graph() \ .node(node_table + str(task_index), node_type='i', decoder=gl.Decoder(attr_types=attr_types, attr_dims=attr_dims)) \ .edge(edge_table + str(task_index), edge_type=('i', 'i', 'train'), decoder=gl.Decoder(weighted=True), directed=False) return g
def test_labeled(self): file_path = self.gen_test_data([utils.LABELED]) decoder = gl.Decoder(labeled=True) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_) self.check_labels(nodes) g.close()
def test_weighted_attributed(self): file_path = self.gen_test_data([utils.WEIGHTED, utils.ATTRIBUTED]) decoder = gl.Decoder(weighted=True, attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_) self.check_weights(nodes) self.check_attrs(nodes) g.close()
def test_basic(self): file_path = self.gen_test_data([], False) decoder = gl.Decoder() g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) edges = g.E("first").batch(4).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1]))
def load_graph(config): node_type = config['node_type'] edge_type = config['edge_type'] g = gl.Graph()\ .node("../../data/arxiv/arxiv-links-train-node-attrs", node_type=node_type, decoder=gl.Decoder(attr_types=["int"])) \ .edge("../../data/arxiv/arxiv-links-train-edge", edge_type=(node_type, node_type, edge_type), decoder=gl.Decoder(weighted=True), directed=False) return g
def test_labeled(self): file_path = self.gen_test_data([utils.LABELED], False) decoder = gl.Decoder(labeled=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH) edges = g.E("first").batch(self.batch_size_).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) utils.check_edge_labels(edges)
def load_graph(config): dataset_folder = config['dataset_folder'] node_type = config['node_type'] edge_type = config['edge_type'] g = gl.Graph()\ .node(dataset_folder + "node_table", node_type=node_type, decoder=gl.Decoder(attr_types=["float"]*50))\ .edge(dataset_folder + "edge_table", edge_type=(node_type, node_type, edge_type), decoder=gl.Decoder(weighted=True), directed=False)\ .node(dataset_folder + "node_table", node_type="train", decoder=gl.Decoder(attr_types=["float"]*50)) return g
def test_weighted(self): file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) edges = g.E("first").batch(self.batch_size_).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) utils.check_edge_weights(edges) g.close()
def init_graph(self): user_attr_types = ['float'] * 4 item_attr_types = ['float', ('string', 100), ('string', 50)] user_attr_dims = [None] * 4 item_attr_dims = [None, self.dim1, self.dim2] g = gl.Graph() \ .node(self.user_path, 'u', decoder=gl.Decoder( attr_types=user_attr_types, attr_dims=user_attr_dims)) \ .node(self.item_path, 'i', decoder=gl.Decoder( attr_types=item_attr_types, attr_dims=item_attr_dims)) \ .edge(self.u2i_path, ('u', 'i', 'u-i'), decoder=gl.Decoder()) \ .edge(self.i2i_path, ('i', 'i', 'i-i'), decoder=gl.Decoder()) \ .init() return g
def test_homo_sage_supervised(self): item_path = self.gen_node_labeled('item') i2i_path = utils.gen_edge_data('item', 'item', (0, 100), (0, 100), schema=[]) g = gl.Graph() \ .node(item_path, 'i', decoder=gl.Decoder(attr_types=['float'] * 4, attr_dims=[None] * 4, labeled=True)) \ .edge(i2i_path, ('i', 'i', 'i-i'), decoder=gl.Decoder(), directed=False) \ .init() query = g.V('i').batch(10).alias('i') \ .outV('i-i').sample(5).by('topk').alias('hop1') \ .outV('i-i').sample(5).by('random').alias('hop2') \ .values() df = tfg.DataFlow(query) dims = np.array([4, 16, 8]) model = tfg.HomoEgoGraphSAGE(dims, bn_fn=None, active_fn=tf.nn.relu, droput=0.1) eg = df.get_ego_graph('i') embeddings = model.forward(eg) nc = tfg.NodeClassifier(dims=[8, 4], class_num=2) logits, loss = nc.forward(embeddings, eg.nodes.labels) target_ids = eg.nodes.ids out_degrees = eg.nodes.out_degrees trainer = tfg.Trainer() trainer.minimize(loss) def trace(ret): self.assertEqual(len(ret), 4) self.assertEqual(list(ret[0].shape), [10, 2]) self.assertEqual(list(ret[2].shape), [10]) # ids self.assertEqual(list(ret[3].shape), [10]) for deg in ret[3]: assert deg in (0, 2, 4, 6, 8) trainer.step_to_epochs(df, 10, [logits, loss, target_ids, out_degrees], trace) trainer.close() g.close()
def test_basic(self): gl.set_eager_mode(True) file_path = self.gen_test_data([], False) decoder = gl.Decoder() gl.set_eager_mode(True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) edges = g.E("first").batch(4).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) g.close()
def load_graph(config): dataset_folder = config['dataset_folder'] node_type = config['node_type'] edge_type = config['edge_type'] g = gl.Graph() \ .node(dataset_folder + "node_table", node_type=node_type, decoder=gl.Decoder(labeled=True, attr_types=["float"] * (config['features_num']), attr_delimiter=":")) \ .edge(dataset_folder + "edge_table", edge_type=(node_type, node_type, edge_type), decoder=gl.Decoder(weighted=True), directed=False) \ .node(dataset_folder + "train_table", node_type="train", decoder=gl.Decoder(weighted=True)) \ .node(dataset_folder + "val_table", node_type="val", decoder=gl.Decoder(weighted=True)) \ .node(dataset_folder + "test_table", node_type="test", decoder=gl.Decoder(weighted=True)) return g
def test_labeled(self): file_path = self.gen_test_data([utils.LABELED], False) decoder = gl.Decoder(labeled=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) query = g.E("first").batch(self.batch_size_).alias('e').values() ds = gl.Dataset(query, window=1) edges = ds.next()['e'] utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) utils.check_edge_labels(edges) g.close()