コード例 #1
0
  def test_node_iterate_using_gsl(self):
    file_path = self.gen_test_data([utils.ATTRIBUTED])
    decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
    g = gl.Graph() \
      .node(source=file_path, node_type=self.node_type_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    batch_size = 4
    query = g.V("user").batch(batch_size).alias('n').values()
    ds = gl.Dataset(query, window=1)
    
    res_ids = []
    max_iter = 100
    for i in range(max_iter):
      try:
        nodes = ds.next()['n']
        utils.check_node_attrs(nodes)
        res_ids.extend(list(nodes.ids))
      except gl.OutOfRangeError:
        break
    ids = range(self.value_range_[0], self.value_range_[1])
    utils.check_sorted_equal(res_ids, ids)

    query = g.V('user').batch(batch_size).shuffle().alias('n').values()
    ds = gl.Dataset(query)
    
    max_iter = 10
    for i in range(max_iter):
      nodes = ds.next()['n']
      utils.check_node_attrs(nodes)
      utils.check_subset(nodes.ids, ids)

    g.close()
コード例 #2
0
 def test_conditional_negative_sample(self):
   def _check_ids(pos_id, neg_ids):
     utils.check_val_equal(neg_ids[0] % 5, pos_id % 5)
     utils.check_val_equal(neg_ids[1] % 4, pos_id % 4)
     utils.check_val_equal(neg_ids[2] % 3, pos_id % 3)
     utils.check_val_equal(neg_ids[3] % 3, pos_id % 3)
   q = self.g.E(self._cond_edge_type).batch(4).alias("e") \
             .each(lambda e: (
               e.inV().alias('dst'),
               e.outV().alias('src') \
                .outNeg(self._cond_edge_type).sample(4).by('random').where(
                  "dst",
                  condition={
                    "int_cols": [0,1], "int_props": [0.25,0.25],
                    "str_cols": [0], "str_props": [0.5]}).alias('neg'))) \
             .values()
   dataset = gl.Dataset(q)
   res = dataset.next()
   src_ids = res["src"].ids
   dst_ids = res["dst"].ids
   neg_ids = res["neg"].ids
   for idx, id in enumerate(src_ids):
     print('src_id:', id, 'dst_id:', dst_ids[idx], 'neg_ids:', neg_ids[idx])
     nbr_ids = [id+2,id+3,id+5]
     utils.check_disjoint(neg_ids[idx], nbr_ids)
     _check_ids(dst_ids[idx], neg_ids[idx])
コード例 #3
0
  def test_edge_shuffle(self):
    file_path = self.gen_test_data([utils.WEIGHTED], False)
    decoder = gl.Decoder(weighted=True)
    g = gl.Graph() \
      .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    batch_size = 4
    sampler = g.E('first').batch(batch_size).shuffle(traverse=True).alias('seed').values()
    res_src = []
    res_dst = []
    max_iter = 100
    ds = gl.Dataset(sampler)
    for i in range(max_iter):
      try:
        edges = ds.next()['seed']
        utils.check_edge_weights(edges)
        res_src.extend(list(edges.src_ids))
        res_dst.extend(list(edges.dst_ids))
      except gl.OutOfRangeError:
        break
    src_ids = range(self.src_range_[0], self.src_range_[1])
    dst_ids = range(self.dst_range_[0], self.dst_range_[1])
    utils.check_sorted_equal(res_src, src_ids)
    utils.check_sorted_equal(res_dst, dst_ids)

    g.close()
コード例 #4
0
ファイル: test_gsl_mask.py プロジェクト: knut0815/graph-learn
 def test_sampling_with_mask(self):
     gl.set_eager_mode(False)
     bs = 8
     q = self.g.E(self._edge1_type, mask=gl.Mask.TEST).batch(bs).alias('test') \
               .each(
                 lambda e:
                   (e.outV().alias('src'),
                    e.inV().alias('dst') \
                     .outV(self._edge2_type).sample(3).by('topk').alias('nbr'))
               ).values(lambda x:
                  (x['src'].ids,
                   x['test'].labels,
                   x['dst'].ids, x['dst'].weights, x['dst'].labels,
                   x['nbr'].ids, x['nbr'].int_attrs))
     dataset = gl.Dataset(q)
     iteration = 0
     res = []
     while True:
         try:
             sid, elb, did, dwei, dlb, nid, ni = dataset.next()
             utils.check_id_weights(did, dwei)
             utils.check_equal(dlb, did)
             iteration += 1
             res += list(sid)
         except gl.OutOfRangeError:
             break
     whole = range(self._test_node_range[0], self._test_node_range[1])
     expected = []
     for elem in whole:
         expected += [elem] * len(
             utils.fixed_dst_ids(elem, self._node2_range))
     utils.check_sorted_equal(res, expected)
コード例 #5
0
 def test_negative_sample(self):
   q = self.g.V(self._node1_type).batch(2).alias('a') \
             .outNeg(self._edge1_type).sample(5).by("random").alias('b') \
             .values(lambda x: (x['a'].ids, x['b'].weights))
   dataset = gl.Dataset(q)
   res = dataset.next()
   utils.check_equal(list(res[0].shape), [2])
   utils.check_equal(list(res[1].shape), [2, 5])
コード例 #6
0
 def test_sample_edge(self):
   q = self.g.V(self._node1_type).batch(8).alias('a') \
             .outE(self._edge1_type).sample(3).by("random").alias('b') \
             .inV().alias('c') \
             .values()
   dataset = gl.Dataset(q)
   res = dataset.next()
   utils.check_equal(list(res['a'].shape), [8])
   utils.check_equal(list(res['b'].shape), [8, 3])
   utils.check_equal(list(res['c'].shape), [8, 3])
コード例 #7
0
 def test_iterate_node_with_2hop(self):
   q = self.g.V(self._node1_type).batch(2).alias('a') \
             .outV(self._edge1_type).sample(3).by('random').alias('b') \
             .outV(self._edge2_type).sample(4).by('random').alias('c') \
             .values()
   dataset = gl.Dataset(q, 10)
   while True:
     try:
       res = dataset.next()
       utils.check_equal(list(res['a'].shape), [2])
       utils.check_equal(list(res['b'].shape), [2, 3])
       utils.check_equal(list(res['c'].shape), [2 * 3, 4])
     except gl.OutOfRangeError:
       break
コード例 #8
0
 def test_iterate_edge_with_1hop(self):
   q = self.g.E(self._edge1_type).batch(4).alias("a") \
             .outV().alias("b") \
             .outV(self._edge1_type).sample(2).by("random").alias("c") \
             .values()
   dataset = gl.Dataset(q)
   while True:
     try:
       res = dataset.next()
       utils.check_equal(list(res['a'].shape), [4])
       utils.check_equal(list(res['b'].shape), [4])
       utils.check_equal(list(res['b'].int_attrs.shape), [4, 2])  # [batch_size, int_attr_num]
       utils.check_equal(list(res['c'].shape), [4, 2])
     except gl.OutOfRangeError:
       break
コード例 #9
0
 def test_iterate_edge_with_each(self):
   q = self.g.E(self._edge1_type).batch(4).alias('a') \
             .each(
               lambda x: (
                 x.outV().alias('b').outV(self._edge1_type).sample(2).by('random').alias('d'),
                 x.inV().alias('c').outV(self._edge2_type).sample(2).by('random').alias('e')
               )) \
             .values(
               lambda x: (x['a'].int_attrs, x['d'].weights, x['e'].ids)
             )
   dataset = gl.Dataset(q)
   while True:
     try:
       dataset.next()
     except gl.OutOfRangeError:
       break
コード例 #10
0
 def test_full_sample(self):
   q = self.g.V(self._node2_type).batch(4).alias('a') \
             .outV(self._edge2_type).sample(3).by("full").alias('b') \
             .values(lambda x: (x['a'].ids, x['b'].ids, x['b'].offsets))
   dataset = gl.Dataset(q)
   while True:
     try:
       src, nbrs, offsets = dataset.next()
       start = 0
       for idx, offset in enumerate(offsets):
         expected_nbrs = utils.fixed_dst_ids(src[idx], self._node1_range)
         assert offset == min(len(expected_nbrs), 3)
         utils.check_subset(nbrs[start: start + offset], expected_nbrs)
         start += offset
     except gl.OutOfRangeError:
       break
コード例 #11
0
  def test_basic(self):
    file_path = self.gen_test_data([], False)
    decoder = gl.Decoder()
    g = gl.Graph() \
      .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    query = g.E("first").batch(4).alias('e').values()
    ds = gl.Dataset(query)

    edges = ds.next()['e']
    utils.check_ids(edges.src_ids,
                    range(self.src_range_[0], self.src_range_[1]))
    utils.check_ids(edges.dst_ids,
                    range(self.dst_range_[0], self.dst_range_[1]))

    g.close()
コード例 #12
0
    def test_labeled(self):
        file_path = self.gen_test_data([utils.LABELED], False)
        decoder = gl.Decoder(labeled=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        query = g.E("first").batch(self.batch_size_).alias('e').values()
        ds = gl.Dataset(query, window=1)
        edges = ds.next()['e']
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_labels(edges)

        g.close()
コード例 #13
0
 def test_sample_with_filter(self):
   q = self.g.E(self._edge1_type).batch(4).alias("a") \
             .each(lambda e:
               (e.inV().alias('dst'),
                e.outV().alias('src') \
                 .outV(self._edge1_type).sample(2).by("random").filter('dst').alias("b")
               )
             ) \
             .values()
   dataset = gl.Dataset(q)
   while True:
     try:
       res = dataset.next()
       utils.check_equal(list(res['b'].shape), [4, 2])
       filter_ids = res['dst'].ids
       remained_ids = res['b'].ids
       for fid, rid in zip(filter_ids, remained_ids):
         assert fid not in rid
     except gl.OutOfRangeError:
       break
コード例 #14
0
 def test_traverse_with_mask(self):
   bs = 8
   q = self.g.V(self._node1_type, mask=gl.Mask.TEST).batch(bs).alias('test') \
           .values(lambda x:
              (x['test'].ids, x['test'].int_attrs, x['test'].float_attrs, x['test'].string_attrs))
   dataset = gl.Dataset(q)
   iteration = 0
   for i in range(2):
     res = []
     while True:
       try:
         ids, i, f, s = dataset.next()
         utils.check_i_attrs(i, ids)
         utils.check_f_attrs(f, ids)
         utils.check_s_attrs(s, ids)
         iteration += 1
         res += list(ids)
       except gl.OutOfRangeError:
         break
     utils.check_sorted_equal(res, range(self._test_node_range[0], self._test_node_range[1]))
コード例 #15
0
  def test_weighted_labeled_attributed(self):
    file_path = self.gen_test_data(
        [utils.WEIGHTED, utils.LABELED, utils.ATTRIBUTED], False)
    decoder = gl.Decoder(
        weighted=True, labeled=True, attr_types=utils.ATTR_TYPES)
    g = gl.Graph() \
      .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    query = g.E("first").batch(self.batch_size_).alias('e').values()
    ds = gl.Dataset(query)

    edges = ds.next()['e']
    utils.check_ids(edges.src_ids,
                    range(self.src_range_[0], self.src_range_[1]))
    utils.check_ids(edges.dst_ids,
                    range(self.dst_range_[0], self.dst_range_[1]))
    utils.check_edge_labels(edges)
    utils.check_edge_attrs(edges)
    utils.check_edge_weights(edges)

    g.close()
コード例 #16
0
def test_truncated_full_edge_sample(graph):
    """Iterate buy edges, and sample full neighbors of the dst nodes.
  """
    edges = graph.E("buy").batch(3).shuffle(traverse=True).alias("edges")
    dst = edges.inV().alias("dst")
    dst.inE("buy").sample(200).by("full").alias("dst_hop1_edges") \
        .inV().alias("dst_hop1")
    ds = gl.Dataset(edges.values())
    step = 0
    while True:
        try:
            res = ds.next()
            step += 1
            dst_hop1_edges = res["dst_hop1_edges"]
            if step == 1:
                print(dst_hop1_edges.offsets)
            src_ids = list(dst_hop1_edges.src_ids.flatten())
            dst_ids = list(dst_hop1_edges.dst_ids.flatten())
            weights = list(dst_hop1_edges.weights.flatten())
            for src_id, dst_id, weight in zip(src_ids, dst_ids, weights):
                assert abs(0.1 * (src_id + dst_id) - weight) < 10**-6
        except gl.OutOfRangeError:
            break
コード例 #17
0
def test_conditional_negtaive_sample(graph):
    """Negative sampling with condition.
  """
    condition = {
        "unique": False,
        "batch_share": True,
        "int_cols": [0, 1],
        "int_props": [0.25, 0.25],
        "str_cols": [0],
        "str_props": [0.5]
    }

    edges = graph.E("cond_edge").batch(4).shuffle(traverse=True).alias("edges")
    src = edges.outV().alias("src")
    dst = edges.inV().alias("dst")
    src.outNeg("cond_edge").sample(5).by("in_degree") \
       .where(target="dst", condition=condition).alias("neg") \
            .values()

    ds = gl.Dataset(edges.values())
    try:
        res = ds.next()
        src_ids = res["src"].ids
        dst_ids = res["dst"].ids
        neg_nodes = res["neg"]
        for i in range(src_ids.size):
            print('src_id:%d\tdst_id:%d' % (src_ids[i], dst_ids[i]))
            print('neg_id_1:%d\tint_0_attr:%d' %
                  (neg_nodes.ids[i][0], neg_nodes.int_attrs[i][0][0]))
            print('neg_id_2:%d\tint_1_attr:%d' %
                  (neg_nodes.ids[i][1], neg_nodes.int_attrs[i][1][1]))
            print('neg_id_3:%d\tstr_0_attr:%s' %
                  (neg_nodes.ids[i][2], neg_nodes.string_attrs[i][2][0]))
            print('neg_id_4:%d\tstr_0_attr:%s\n' %
                  (neg_nodes.ids[i][3], neg_nodes.string_attrs[i][3][0]))
    except gl.OutOfRangeError:
        print("OutOfRange...")
コード例 #18
0
def test_edge_iterate(graph, local=False):
    """Iterate buy edges, sample hops of src and dst nodes.
    user-(buy)-item   (1) iterate edges
      |         |
    (buy)  (buy_reverse)
      |         |
    item       user   (2) sample neighbors of src and dst nodes. `
  """
    edges = graph.E("buy").batch(32).shuffle(traverse=True).alias("edges")
    src = edges.outV().alias("src")
    dst = edges.inV().alias("dst")
    neg = src.outNeg("buy").sample(2).by("in_degree").alias("neg")

    neg.inV("buy").sample(4).by("random").alias("neg_hop1")
    src.outE("buy").sample(5).by("random").alias("src_hop1_edges") \
       .inV().alias("src_hop1")
    dst.inE("buy").sample(3).by("edge_weight").alias("dst_hop1_edges") \
        .inV().alias("dst_hop1")

    query = edges.values()
    ds = gl.Dataset(query)
    epoch = 2
    for i in range(epoch):
        step = 0
        while True:
            try:
                res = ds.next()
                step += 1

                edges = res["edges"]
                src_nodes = res["src"]
                dst_nodes = res["dst"]
                neg_nodes = res["neg"]
                src_hop1_edges = res["src_hop1_edges"]
                src_hop1_nodes = res["src_hop1"]
                neg_hop1_nodes = res["neg_hop1"]
                dst_hop1_edges = res["dst_hop1_edges"]
                dst_hop1_nodes = res["dst_hop1"]

                assert edges.type == ("user", "item", "buy")
                assert src_nodes.type == "user"
                assert dst_nodes.type == "item"
                assert neg_nodes.type == "item"
                assert src_hop1_edges.type == ("user", "item", "buy")
                assert src_hop1_nodes.type == "item"
                assert neg_hop1_nodes.type == "user"
                assert dst_hop1_edges.type == ("item", "user", "buy_reverse")
                assert dst_hop1_nodes.type == "user"

                if local and step == 1000 // 32 + 1:  # total buy edges count is 1000
                    assert tuple(neg_nodes.float_attrs.shape) == (1000 % 32, 2,
                                                                  2)
                    assert tuple(neg_hop1_nodes.weights.shape) == (
                        1000 % 32 * 2,
                        4,
                    )
                    assert tuple(src_hop1_edges.weights.shape) == (1000 % 32,
                                                                   5)
                    assert tuple(
                        src_hop1_nodes.float_attrs.shape) == (1000 % 32, 5, 2)
                    assert tuple(dst_hop1_edges.weights.shape) == (1000 % 32,
                                                                   3)
                    assert tuple(dst_hop1_nodes.weights.shape) == (1000 % 32,
                                                                   3)
                elif local or step == 1:
                    assert tuple(neg_nodes.float_attrs.shape) == (32, 2, 2)
                    assert tuple(neg_hop1_nodes.weights.shape) == (
                        32 * 2,
                        4,
                    )
                    assert tuple(src_hop1_edges.weights.shape) == (32, 5)
                    assert tuple(src_hop1_nodes.float_attrs.shape) == (32, 5,
                                                                       2)
                    assert tuple(dst_hop1_edges.weights.shape) == (32, 3)
                    assert tuple(dst_hop1_nodes.weights.shape) == (32, 3)

                src_ids = list(dst_hop1_edges.src_ids.flatten())
                dst_ids = list(dst_hop1_edges.dst_ids.flatten())
                weights = list(dst_hop1_edges.weights.flatten())
                for src_id, dst_id, weight in zip(src_ids, dst_ids, weights):
                    assert abs(0.1 * (src_id + dst_id) - weight) < 10**-6

                src_ids = list(src_hop1_edges.src_ids.flatten())
                dst_ids = list(src_hop1_edges.dst_ids.flatten())
                weights = list(src_hop1_edges.weights.flatten())
                for src_id, dst_id, weight in zip(src_ids, dst_ids, weights):
                    assert abs(0.1 * (src_id + dst_id) - weight) < 10**-6

            except gl.OutOfRangeError:
                break
コード例 #19
0
def test_node_iterate(graph, local=False):
    """Iterate users, sample 2 hops with path user-(buy)-item-(buy_reverse)-user.
  """
    query = graph.V("user").batch(32).shuffle(traverse=True).alias("src") \
            .outV("buy").sample(5).by("edge_weight").alias("hop1") \
            .inE("buy").sample(2).by("random").alias("hop1-hop2") \
            .inV().alias("hop2") \
            .values()

    ds = gl.Dataset(query)
    epoch = 2
    for i in range(epoch):
        step = 0
        while True:
            try:
                res = ds.next()
                step += 1

                src_nodes = res["src"]
                hop1_nodes = res["hop1"]
                hop1_hop2_edges = res["hop1-hop2"]
                hop2_nodes = res["hop2"]

                assert isinstance(src_nodes, gl.Nodes)
                assert isinstance(hop1_nodes, gl.Nodes)
                assert isinstance(hop1_hop2_edges, gl.Edges)
                assert isinstance(hop2_nodes, gl.Nodes)

                assert src_nodes.type == "user"
                assert hop1_nodes.type == "item"
                assert hop1_hop2_edges.type == ("item", "user", "buy_reverse")
                assert hop1_hop2_edges.edge_type == "buy_reverse"
                assert hop2_nodes.type == "user"

                if local and step == 100 // 32 + 1:  # total user nodes count is 100
                    assert tuple(src_nodes.ids.shape) == (100 % 32, )
                    assert tuple(hop1_nodes.ids.shape) == (100 % 32, 5)
                    assert tuple(hop1_hop2_edges.src_ids.shape) == (100 % 32 *
                                                                    5, 2)
                    assert tuple(hop1_hop2_edges.dst_ids.shape) == (100 % 32 *
                                                                    5, 2)
                    assert tuple(hop2_nodes.ids.shape) == (100 % 32 * 5, 2)

                    assert tuple(hop1_nodes.float_attrs.shape) == (
                        100 % 32, 5, 2)  # 2 float attrs
                    assert tuple(hop1_hop2_edges.weights.shape) == (100 % 32 *
                                                                    5, 2)
                    assert tuple(hop2_nodes.weights.shape) == (100 % 32 * 5, 2)
                elif local or step == 1:
                    assert tuple(src_nodes.ids.shape) == (32, )
                    assert tuple(hop1_nodes.ids.shape) == (32, 5)
                    assert tuple(hop1_hop2_edges.src_ids.shape) == (32 * 5, 2)
                    assert tuple(hop1_hop2_edges.dst_ids.shape) == (32 * 5, 2)
                    assert tuple(hop2_nodes.ids.shape) == (32 * 5, 2)

                    assert tuple(hop1_nodes.float_attrs.shape) == (
                        32, 5, 2)  # 2 float attrs
                    assert tuple(hop1_hop2_edges.weights.shape) == (32 * 5, 2)
                    assert tuple(hop2_nodes.weights.shape) == (32 * 5, 2)
            except gl.OutOfRangeError:
                break