Beispiel #1
0
def load_graph(args):
    dataset_folder = args.dataset_folder
    node_type = 'item'
    edge_type = 'relation'
    # shoud be split when distributed training.
    node_path = dataset_folder + "node_table"
    edge_path = dataset_folder + "edge_table"
    train_path = dataset_folder + "train_table"
    val_path = dataset_folder + "val_table"
    test_path = dataset_folder + "test_table"


    g = gl.Graph()                                                           \
          .node(node_path, node_type=node_type,
                decoder=gl.Decoder(labeled=True,
                                   attr_types=["float"] * args.features_num,
                                   attr_delimiter=":"))                      \
          .edge(edge_path,
                edge_type=(node_type, node_type, edge_type),
                decoder=gl.Decoder(weighted=True), directed=False)           \
          .node(train_path, node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.TRAIN)       \
          .node(val_path, node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.VAL)         \
          .node(test_path, node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.TEST)
    return g
Beispiel #2
0
def main(argv):
    cur_path = sys.path[0]

    cluster = ""
    job_name = ""
    task_index = 0
    mode = 0

    opts, args = getopt.getopt(
        argv, 'c:j:t:', ['cluster=', 'job_name=', 'task_index=', 'mode='])
    for opt, arg in opts:
        if opt in ('-c', '--cluster'):
            cluster = arg
        elif opt in ('-j', '--job_name'):
            job_name = arg
        elif opt in ('-t', '--task_index'):
            task_index = int(arg)
        elif opt in ('-m', '--mode'):
            mode = int(arg)
        else:
            pass

    gl.set_tracker_mode(mode)

    g = gl.Graph()

    g.node(os.path.join(cur_path, "data/user"),
           node_type="user", decoder=gl.Decoder(weighted=True)) \
      .node(os.path.join(cur_path, "data/item"),
            node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
      .edge(os.path.join(cur_path, "data/u-i"),
            edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True))

    g.init(cluster=cluster, job_name=job_name, task_index=task_index)

    if job_name == "server":
        print("Server {} started.".format(task_index))
        g.wait_for_close()

    if job_name == "client":
        print("Client {} started.".format(task_index))
        q = g.V("user").batch(10).values()
        for i in range(3):
            while True:
                try:
                    print(g.run(q).ids)
                except gl.OutOfRangeError:
                    print("Out of range......")
                    break

        q = g.E("buy").batch(10).values()
        for i in range(3):
            while True:
                try:
                    print(g.run(q).dst_ids)
                except gl.OutOfRangeError:
                    print("Out of range......")
                    break
        g.close()
Beispiel #3
0
def load_graph(config):
  node_type = config['node_type']
  edge_type = config['edge_type']
  g = gl.Graph().edge("../../data/blogcatelog/edge_table",
                      edge_type=(node_type, node_type, edge_type),
                      decoder=gl.Decoder(weighted=True), directed=False)\
                .node("../../data/blogcatelog/node_table", node_type=node_type,
                      decoder=gl.Decoder(weighted=True))
  return g
Beispiel #4
0
def load_graph(config):
    data_dir = config['dataset_folder']
    g = gl.Graph() \
      .node(data_dir+'ogbl_collab_node', node_type='i',
            decoder=gl.Decoder(attr_types=['float'] * config['features_num'],
                               attr_dims=[0]*config['features_num'])) \
      .edge(data_dir+'ogbl_collab_train_edge', edge_type=('i', 'i', 'train'),
            decoder=gl.Decoder(weighted=True), directed=False)
    return g
Beispiel #5
0
def load_graph(config):
    g = gl.Graph()\
          .node("../../data/u2i/u2i_node_attrs", node_type="i",
                decoder=gl.Decoder(attr_types=config['i_attr_types'], attr_dims=config['i_attr_dims']))\
          .node("../../data/u2i/u2i_node_attrs", node_type="u",
                decoder=gl.Decoder(attr_types=config['u_attr_types'], attr_dims=config['u_attr_dims']))\
          .edge("../../data/u2i/u2i_20200222_train", edge_type=("u", "i", "u-i"),
                decoder=gl.Decoder(weighted=True), directed=False)
    return g
Beispiel #6
0
def load_graph():
    g = gl.Graph()\
          .node("../../data/FB15k-237/entity_node_table", node_type="entity",
                decoder=gl.Decoder(attr_types=["int"]))\
          .node("../../data/FB15k-237/relation_node_table", node_type="relation",
                decoder=gl.Decoder(attr_types=["int"]))\
          .edge("../../data/FB15k-237/train_tuple_table",
                edge_type=("entity", "entity", "hrt"),
                decoder=gl.Decoder(attr_types=["int"], weighted=False))
    return g
Beispiel #7
0
def load_graph(task_index):
  node_table, edge_table = FLAGS.tables.split(',')[0:2]
  attr_types = json.loads(FLAGS.attr_types)
  attr_dims = json.loads(FLAGS.attr_dims)
  g = gl.Graph() \
    .node(node_table + str(task_index), node_type='i',
          decoder=gl.Decoder(attr_types=attr_types,
                             attr_dims=attr_dims)) \
    .edge(edge_table + str(task_index), edge_type=('i', 'i', 'train'),
          decoder=gl.Decoder(weighted=True), directed=False)
  return g
Beispiel #8
0
def load_graph(config):
    node_type = config['node_type']
    edge_type = config['edge_type']
    g = gl.Graph()\
          .node("../../data/arxiv/arxiv-links-train-node-attrs",
                node_type=node_type,
                decoder=gl.Decoder(attr_types=["int"])) \
          .edge("../../data/arxiv/arxiv-links-train-edge",
                edge_type=(node_type, node_type, edge_type),
                decoder=gl.Decoder(weighted=True), directed=False)

    return g
def load_graph(config):
  dataset_folder = config['dataset_folder']
  node_type = config['node_type']
  edge_type = config['edge_type']
  g = gl.Graph()\
        .node(dataset_folder + "node_table", node_type=node_type,
              decoder=gl.Decoder(attr_types=["float"]*50))\
        .edge(dataset_folder + "edge_table",
              edge_type=(node_type, node_type, edge_type),
              decoder=gl.Decoder(weighted=True), directed=False)\
        .node(dataset_folder + "node_table", node_type="train",
              decoder=gl.Decoder(attr_types=["float"]*50))
  return g
Beispiel #10
0
    def setUp(self):
        """ prepare the data and the decoder.
    num_int_attrs = num_float_attrs = num_string_attrs = 2.
    dst_ids = utils.fixed_dst_ids(src_ids, dst_range).
    with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge.
    """
        self._node1_type = "node1"
        self._node2_type = "node2"
        self._edge1_type = "edge1"
        self._edge2_type = "edge2"
        self._edge3_type = "edge3"

        # for subgraph sampler
        self._node3_type = "entity"
        self._edge4_type = "relation"

        # for conditional negative sampler
        self._cond_node_type = "cond_item"
        self._cond_edge_type = "cond_sim"

        self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        self._node2_decoder = gl.Decoder(weighted=True, labeled=True)
        self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         labeled=True)
        self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         weighted=True)
        self._edge3_decoder = gl.Decoder(weighted=True)
        self._node3_decoder = gl.Decoder(attr_types=utils.ENTITY_ATTR_TYPES,
                                         labeled=True)
        self._edge4_decoder = gl.Decoder(weighted=True)
        self._cond_node_deocder = gl.Decoder(attr_types=utils.COND_ATTR_TYPES,
                                             weighted=True)

        self._node1_range = (0, 100)
        self._node2_range = (100, 200)

        # test for mask.
        self._train_node_range = (0, 50)
        self._test_node_range = (50, 70)
        self._val_node_range = (70, 100)

        self._node1_ids = range(self._node1_range[0], self._node1_range[1])
        self._node2_ids = utils.fixed_dst_ids(self._node1_ids,
                                              self._node2_range)
        self._seed_node1_ids = np.array([2, 7, 8])
        self._seed_node2_ids = np.array([102, 107, 108])

        self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110])
        self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108])
        # there has no edge whose src_id = 5 | 105

        self._default_dst_id = -1
        self._default_int_attr = 1000
        self._default_float_attr = 999.9
        self._default_string_attr = 'hehe'

        if self.needs_initial:
            self.initialize()
        if not self.g:
            time.sleep(1)
Beispiel #11
0
    def init_graph(self):
        user_attr_types = ['float'] * 4
        item_attr_types = ['float', ('string', 100), ('string', 50)]
        user_attr_dims = [None] * 4
        item_attr_dims = [None, self.dim1, self.dim2]

        g = gl.Graph() \
                .node(self.user_path, 'u', decoder=gl.Decoder(
                  attr_types=user_attr_types, attr_dims=user_attr_dims)) \
                .node(self.item_path, 'i', decoder=gl.Decoder(
                  attr_types=item_attr_types, attr_dims=item_attr_dims)) \
                .edge(self.u2i_path, ('u', 'i', 'u-i'), decoder=gl.Decoder()) \
                .edge(self.i2i_path, ('i', 'i', 'i-i'), decoder=gl.Decoder()) \
                .init()
        return g
    def test_homo_sage_supervised(self):
        item_path = self.gen_node_labeled('item')
        i2i_path = utils.gen_edge_data('item',
                                       'item', (0, 100), (0, 100),
                                       schema=[])

        g = gl.Graph() \
              .node(item_path, 'i',
                    decoder=gl.Decoder(attr_types=['float'] * 4, attr_dims=[None] * 4,
                                       labeled=True)) \
              .edge(i2i_path, ('i', 'i', 'i-i'), decoder=gl.Decoder(), directed=False) \
              .init()

        query = g.V('i').batch(10).alias('i') \
                 .outV('i-i').sample(5).by('topk').alias('hop1') \
                 .outV('i-i').sample(5).by('random').alias('hop2') \
                 .values()
        df = tfg.DataFlow(query)

        dims = np.array([4, 16, 8])
        model = tfg.HomoEgoGraphSAGE(dims,
                                     bn_fn=None,
                                     active_fn=tf.nn.relu,
                                     droput=0.1)

        eg = df.get_ego_graph('i')
        embeddings = model.forward(eg)
        nc = tfg.NodeClassifier(dims=[8, 4], class_num=2)
        logits, loss = nc.forward(embeddings, eg.nodes.labels)

        target_ids = eg.nodes.ids
        out_degrees = eg.nodes.out_degrees

        trainer = tfg.Trainer()
        trainer.minimize(loss)

        def trace(ret):
            self.assertEqual(len(ret), 4)
            self.assertEqual(list(ret[0].shape), [10, 2])
            self.assertEqual(list(ret[2].shape), [10])  # ids
            self.assertEqual(list(ret[3].shape), [10])
            for deg in ret[3]:
                assert deg in (0, 2, 4, 6, 8)

        trainer.step_to_epochs(df, 10, [logits, loss, target_ids, out_degrees],
                               trace)
        trainer.close()
        g.close()
Beispiel #13
0
    def test_node_iterate(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0][0], self.value_range_[0][1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_node_iterate_using_gsl(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.V('user').batch(batch_size).values()
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = g.run(query)
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0], self.value_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        query = g.V('user').batch(batch_size).shuffle().values()
        max_iter = 10
        for i in range(max_iter):
            nodes = g.run(query)
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
Beispiel #15
0
    def test_edge_iterate_using_gremlin(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.E('first').batch(batch_size).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = g.run(query)
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        query = g.E('first').batch(batch_size).shuffle().values()
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = g.run(query)
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)
    def test_edge_shuffle(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.E('first').batch(batch_size).shuffle(
            traverse=True).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = sampler.next()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        g.close()
Beispiel #17
0
def load_graph(config):
    dataset_folder = config['dataset_folder']
    node_type = config['node_type']
    edge_type = config['edge_type']
    g = gl.Graph() \
      .node(dataset_folder + "node_table", node_type=node_type,
            decoder=gl.Decoder(labeled=True, attr_types=["float"] * (config['features_num']), attr_delimiter=":")) \
      .edge(dataset_folder + "edge_table", edge_type=(node_type, node_type, edge_type),
            decoder=gl.Decoder(weighted=True), directed=False) \
      .node(dataset_folder + "train_table", node_type="train",
            decoder=gl.Decoder(weighted=True)) \
      .node(dataset_folder + "val_table", node_type="val",
            decoder=gl.Decoder(weighted=True)) \
      .node(dataset_folder + "test_table", node_type="test",
            decoder=gl.Decoder(weighted=True))
    return g
    def test_node_iterate_from_graph(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED], False)
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_SRC)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "user")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.src_range_[0], self.src_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_SRC)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_DST)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "item")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_DST)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        g.close()
Beispiel #19
0
def main(argv):
    cur_path = sys.path[0]

    # Step 1: Construct graph with data source.
    #   Edges:
    #     user<--(buy)-->item
    #     entity<--(relation)-->entity
    #     cond_node--(cond_edge)-->cond_node
    g = gl.Graph()
    g.node(os.path.join(cur_path, "data/user"),
           node_type="user", decoder=gl.Decoder(weighted=True)) \
      .node(os.path.join(cur_path, "data/item"),
            node_type="item", decoder=gl.Decoder(attr_types=['string', 'int', 'float', 'float', 'string'])) \
      .edge(os.path.join(cur_path, "data/u-i"),
            edge_type=("user", "item", "buy"), decoder=gl.Decoder(weighted=True), directed=False) \
      .node(os.path.join(cur_path, "data/entity"),
            node_type="entity", decoder=gl.Decoder(attr_types=['float', 'float', 'float', 'float'], labeled=True)) \
      .edge(os.path.join(cur_path, "data/relation"),
            edge_type=("entity", "entity", "relation"), decoder=gl.Decoder(weighted=True), directed=False) \
      .edge(os.path.join(cur_path, "data/relation"),
            edge_type=("cond_node", "cond_node", "cond_edge"), decoder=gl.Decoder(weighted=True), directed=True) \
      .node(os.path.join(cur_path, "data/cond_node"),
            node_type="cond_node", decoder=gl.Decoder(attr_types=['int','int','float','string'], weighted=True))
    g.init()

    # Step 2: Describe the queries on graph.
    test_node_iterate(g, local=True)
    test_edge_iterate(g, local=True)
    test_truncated_full_edge_sample(g)
    test_conditional_negtaive_sample(g)

    g.close()
Beispiel #20
0
    def test_attributed(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_)
        self.check_attrs(nodes)
Beispiel #21
0
 def initialize(self):
     self.__class__.needs_initial = False
     file_path = self.gen_test_data([utils.ATTRIBUTED])
     decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
     self.__class__.g = gl.Graph() \
       .node(source=file_path,
             node_type=self.node_type_,
             decoder=decoder)
     self.__class__.g.init(tracker=utils.TRACKER_PATH)
Beispiel #22
0
    def test_weighted(self):
        file_path = self.gen_test_data([utils.WEIGHTED])
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_)
        self.check_weights(nodes)
Beispiel #23
0
    def setUp(self):
        """ prepare the data and the decoder.
    num_int_attrs = num_float_attrs = num_string_attrs = 2.
    dst_ids = utils.fixed_dst_ids(src_ids, dst_range).
    with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge.
    """
        self._node1_type = "node1"
        self._node2_type = "node2"

        self._edge1_type = "edge1"
        self._edge2_type = "edge2"
        self._edge3_type = "edge3"

        self._node1_range = (0, 100)
        self._node2_range = (100, 200)

        self._node1_ids = range(self._node1_range[0], self._node1_range[1])
        self._node2_ids = utils.fixed_dst_ids(self._node1_ids,
                                              self._node2_range)

        self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        self._node2_decoder = gl.Decoder(weighted=True, labeled=True)
        self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         labeled=True)
        self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         weighted=True)
        self._edge3_decoder = gl.Decoder(weighted=True)

        self._seed_node1_ids = np.array([2, 7, 8])
        self._seed_node2_ids = np.array([102, 107, 108])

        self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110])
        self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108])
        # there has no edge whose src_id = 5 | 105

        self._default_dst_id = -1
        self._default_int_attr = 1000
        self._default_float_attr = 999.9
        self._default_string_attr = 'hehe'

        if self.needs_initial:
            self.initialize()
        if not self.g:
            time.sleep(1)
Beispiel #24
0
def load_graph(args):
    dataset_folder = args.dataset_folder
    node_type = args.node_type
    edge_type = args.edge_type
    g = gl.Graph()                                                           \
          .node(dataset_folder + "node_table", node_type=node_type,
                decoder=gl.Decoder(labeled=True,
                                   attr_types=["float"] * args.features_num,
                                   attr_delimiter=":"))                      \
          .edge(dataset_folder + "edge_table",
                edge_type=(node_type, node_type, edge_type),
                decoder=gl.Decoder(weighted=True), directed=False)           \
          .node(dataset_folder + "train_table", node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.TRAIN)       \
          .node(dataset_folder + "val_table", node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.VAL)         \
          .node(dataset_folder + "test_table", node_type=node_type,
                decoder=gl.Decoder(weighted=True), mask=gl.Mask.TEST)
    return g
    def test_attributed(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED], False)
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        edges = g.get_edges(edge_type="first",
                            src_ids=self.src_ids_,
                            dst_ids=self.dst_ids_)
Beispiel #26
0
    def test_labeled(self):
        file_path = self.gen_test_data([utils.LABELED])
        decoder = gl.Decoder(labeled=True)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_)
        self.check_labels(nodes)

        g.close()
Beispiel #27
0
  def test_weighted_attributed(self):
    file_path = self.gen_test_data([utils.WEIGHTED, utils.ATTRIBUTED])
    decoder = gl.Decoder(weighted=True, attr_types=utils.ATTR_TYPES)
    g = gl.Graph() \
      .node(source=file_path, node_type=self.node_type_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    nodes = g.get_nodes(node_type=self.node_type_, ids=self.ids_)
    self.check_weights(nodes)
    self.check_attrs(nodes)

    g.close()
Beispiel #28
0
    def test_basic(self):
        file_path = self.gen_test_data([], False)
        decoder = gl.Decoder()
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(4).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
Beispiel #29
0
    def test_labeled(self):
        file_path = self.gen_test_data([utils.LABELED], False)
        decoder = gl.Decoder(labeled=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(self.batch_size_).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_labels(edges)
Beispiel #30
0
def load_graph():
  """ Load node and edge data to build graph.
    Note that node_type must be "i", and edge_type must be "r_i", 
    the number of edge tables must be the same as FLAGS.num_relations.
  """
  cur_path = sys.path[0]
  dataset_folder = os.path.join(cur_path, '../../data/cora/')
  g = gl.Graph()\
        .node(dataset_folder + "node_table", node_type="i",
              decoder=gl.Decoder(labeled=True,
                                 attr_types=attr_types,
                                 attr_delimiter=":"))                      \
        .edge(dataset_folder + "edge_table",
              edge_type=("i", "i", "r_0"),
              decoder=gl.Decoder(weighted=True), directed=False)           \
        .edge(dataset_folder + "edge_table_with_self_loop",
              edge_type=("i", "i", "r_1"),
              decoder=gl.Decoder(weighted=True), directed=False)           \
        .node(dataset_folder + "train_table", node_type="i",
              decoder=gl.Decoder(weighted=True), mask=gl.Mask.TRAIN)       \
        .node(dataset_folder + "test_table", node_type="i",
              decoder=gl.Decoder(weighted=True), mask=gl.Mask.TEST)
  return g