Example #1
0
 def test_sampling_with_mask_eager_mode(self):
     gl.set_eager_mode(True)
     bs = 8
     q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \
               .each(
                 lambda e:
                   (e.outV().alias('src'),
                    e.inV().alias('dst') \
                     .outV(self._edge2_type).sample(3).by('topk').alias('nbr'))
               ).values(lambda x:
                  (x['src'].ids,
                   x['val'].labels,
                   x['dst'].ids, x['dst'].weights, x['dst'].labels,
                   x['nbr'].ids, x['nbr'].int_attrs))
     iteration = 0
     for i in range(2):
         res = []
         while True:
             try:
                 sid, elb, did, dwei, dlb, nid, ni = q.next()
                 utils.check_id_weights(did, dwei)
                 utils.check_equal(dlb, did)
                 iteration += 1
                 res += list(sid)
             except gl.OutOfRangeError:
                 break
         whole = range(self._val_node_range[0], self._val_node_range[1])
         expected = []
         for elem in whole:
             expected += [elem] * len(
                 utils.fixed_dst_ids(elem, self._node2_range))
         utils.check_sorted_equal(res, expected)
  def test_1hop_using_gsl(self):
    """ Test case for sample 1 hop neighbor.
    hetegerous graph with edge attrs, without edge weight.
    """
    gl.set_eager_mode(True)
    expand_factor = 6
    ids = self._seed_node1_ids
    nbrs = self.g.V(self._node1_type, feed=ids) \
      .outE(self._edge1_type).sample(expand_factor).by("edge_weight") \
      .inV().emit()

    edges = nbrs[1]
    nodes = nbrs[2]

    utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range,
                                   expected_src_ids=ids)
    utils.check_edge_type(edges,
                          src_type=self._node1_type,
                          dst_type=self._node2_type,
                          edge_type=self._edge1_type)
    utils.check_edge_shape(edges, ids.size * expand_factor)
    utils.check_edge_attrs(edges)
    utils.check_edge_labels(edges)

    utils.check_equal(nodes.ids, edges.dst_ids)
    utils.check_node_ids(nodes, self._node2_ids)
    utils.check_node_type(nodes, node_type=self._node2_type)
    utils.check_node_shape(nodes, ids.size * expand_factor)
    utils.check_node_weights(nodes)
    utils.check_node_labels(nodes)
    def test_1hop_using_gsl(self):
        """ Using gsl api.
    """
        gl.set_eager_mode(True)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor).by("in_degree") \
          .inV().emit()

        edges = nbrs[1]
        nodes = nbrs[2]

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids)
        utils.check_edge_type(edges,
                              src_type=self._node1_type,
                              dst_type=self._node2_type,
                              edge_type=self._edge1_type)
        utils.check_edge_shape(edges, ids.size * expand_factor)
        utils.check_edge_attrs(edges)
        utils.check_edge_labels(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, self._node2_ids)
        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
        utils.check_node_weights(nodes)
        utils.check_node_labels(nodes)
    def test_2hop_using_gsl_with_undirected_edge_homo(self):
        """ Using gsl api and sample neighbor on undirected edges
    whose source node and dst node has same type.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node2_ids

        def repeat_fn(q, params):
            return q.outE(params[0]).sample(params[1]).by("random").inV()

        nbrs = self.g.V(self._node2_type, feed=ids) \
          .repeat(repeat_fn, 2,
                  params_list=[(self._edge3_type, 3), (self._edge3_type, 2)]) \
          .emit()

        edges1 = nbrs[1]
        nodes1 = nbrs[2]
        edges2 = nbrs[3]
        nodes2 = nbrs[4]

        n = expand_factor[0] * expand_factor[1]
        for i in range(0, ids.size * 3):
            for dst_id in nodes2.ids.flatten()[i:i + 2]:
                src_id = nodes1.ids.flatten()[i]
                out_id = utils.fixed_dst_ids(src_id, self._node2_range)
                in_id = utils.fixed_dst_ids(dst_id, self._node2_range)
                utils.check_ids(src_id, out_id + in_id)
    def test_2hop_using_gsl_with_undirected_edge(self):
        """ Using gsl api and sample neighbor on undirected edges
    whose source node and dst node has defferent type.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor[0]).by("random") \
          .inV() \
          .inE(self._edge1_type).sample(expand_factor[1]).by("random") \
          .inV().emit()

        edges1 = nbrs[1]
        nodes1 = nbrs[2]
        edges2 = nbrs[3]
        nodes2 = nbrs[4]

        utils.check_fixed_edge_dst_ids(edges1,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids)

        utils.check_edge_type(edges1, self._node1_type, self._node2_type,
                              self._edge1_type)
        utils.check_edge_type(edges2, self._node2_type, self._node1_type,
                              self._edge1_type + "_reverse")
        utils.check_node_type(nodes1, self._node2_type)
        utils.check_node_type(nodes2, self._node1_type)
        utils.check_node_ids(nodes2, self._node1_ids)
    def test_node_iterate_using_gsl(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.V('user').batch(batch_size).values()
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = g.run(query)
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0], self.value_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        query = g.V('user').batch(batch_size).shuffle().values()
        max_iter = 10
        for i in range(max_iter):
            nodes = g.run(query)
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_2hop(self):
        """ Sample 2 hops of neighbors.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler([self._edge1_type, self._edge2_type],
                                        expand_factor=expand_factor,
                                        strategy="in_degree")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids)
        utils.check_edge_type(edges,
                              src_type=self._node1_type,
                              dst_type=self._node2_type,
                              edge_type=self._edge1_type)
        utils.check_edge_shape(edges, ids.size * expand_factor[0])
        utils.check_edge_attrs(edges)
        utils.check_edge_labels(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, self._node2_ids)
        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor[0])
        utils.check_node_weights(nodes)
        utils.check_node_labels(nodes)

        ids = nodes.ids.reshape(-1)
        edges = nbrs.layer_edges(2)
        nodes = nbrs.layer_nodes(2)

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node1_range,
                                       expected_src_ids=ids)
        utils.check_edge_type(edges,
                              src_type=self._node2_type,
                              dst_type=self._node1_type,
                              edge_type=self._edge2_type)
        utils.check_edge_shape(edges, ids.size * expand_factor[1])
        utils.check_edge_attrs(edges)
        utils.check_edge_weights(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, self._node1_ids)
        utils.check_node_type(nodes, node_type=self._node1_type)
        utils.check_node_shape(nodes, ids.size * expand_factor[1])
Example #8
0
    def test_basic(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([], False)
        decoder = gl.Decoder()
        gl.set_eager_mode(True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(4).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))

        g.close()
  def test_neg_using_gsl(self):
    """ Using gsl api.
    """
    import graphlearn as gl
    gl.set_eager_mode(True)
    expand_factor = 6
    ids = self._seed_node1_ids
    nbrs = self.g.V(self._node2_type, feed=ids) \
      .Neg(self._node2_type).sample(expand_factor).by("node_weight") \
      .emit()

    nodes = nbrs[1]

    utils.check_ids(nodes.ids, [i for i in range(100, 200) if i not in ids])
    utils.check_node_type(nodes, node_type=self._node2_type)
    utils.check_node_shape(nodes, ids.size * expand_factor)
    def test_weighted(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(self.batch_size_).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_weights(edges)

        g.close()
    def test_1hop_circular_padding(self):
        """ Sample one hop of neighbors.
    """
        gl.set_eager_mode(True)
        gl.set_padding_mode(gl.CIRCULAR)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler(self._edge1_type,
                                        expand_factor=expand_factor,
                                        strategy="random_without_replacement")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        for iid, nbrs in zip(ids, nodes.ids):
            full_nbrs = utils.fixed_dst_ids(iid, (100, 200))
            utils.check_set_equal(nbrs, full_nbrs)
  def test_1hop_using_gsl(self):
    """ Topk neighbor sample with gsl api.
    """
    gl.set_eager_mode(True)
    gl.set_padding_mode(gl.REPLICATE)
    ids = self._seed_node2_ids
    nbrs = self.g.V(self._node2_type, feed=ids) \
      .outE(self._edge2_type).sample(2).by("topk") \
      .inV().emit()

    edges = nbrs[1]

    utils.check_topk_edge_ids(edges, ids,
                              (0, 100), expand_factor=2,
                              default_dst_id=self._default_dst_id)
    utils.check_half_exist_edge_weights(
        edges, default_dst_id=self._default_dst_id)
    def test_1hop_using_gsl(self):
        """ Using gsl api.
    """
        gl.set_eager_mode(True)
        gl.set_padding_mode(gl.REPLICATE)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor).by("random_without_replacement") \
          .inV().emit()

        edges = nbrs[1]
        nodes = nbrs[2]

        for iid, nbrs in zip(ids, nodes.ids):
            full_nbrs = utils.fixed_dst_ids(iid, (100, 200))
            full_nbrs.extend([-1])
            utils.check_set_equal(nbrs, full_nbrs)
    def test_labeled_attributed(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.LABELED, utils.ATTRIBUTED],
                                       False)
        decoder = gl.Decoder(labeled=True, attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        edges = g.E("first").batch(self.batch_size_).emit()
        utils.check_ids(edges.src_ids,
                        range(self.src_range_[0], self.src_range_[1]))
        utils.check_ids(edges.dst_ids,
                        range(self.dst_range_[0], self.dst_range_[1]))
        utils.check_edge_labels(edges)
        utils.check_edge_attrs(edges)

        g.close()
Example #15
0
    def test_neg_using_gsl(self):
        """ Using gsl api.
    """
        gl.set_eager_mode(True)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outNeg(self._edge1_type).sample(expand_factor).by("in_degree") \
          .emit()

        nodes = nbrs[1]

        for i, e in enumerate(ids):
            expected_ids = [iid for iid in self._node2_ids if \
                iid not in utils.fixed_dst_ids(e, self._node2_range)]
            utils.check_ids(nodes.ids[i], expected_ids)

        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
Example #16
0
 def test_traverse_with_mask_eager_mode(self):
     gl.set_eager_mode(True)
     bs = 8
     q = self.g.V(self._node1_type, mask=gl.Mask.TRAIN).batch(bs).alias('train') \
             .values(lambda x:
                (x['train'].ids, x['train'].int_attrs, x['train'].float_attrs, x['train'].string_attrs))
     iteration = 0
     res = []
     while True:
         try:
             ids, i, f, s = q.next()
             utils.check_i_attrs(i, ids)
             utils.check_f_attrs(f, ids)
             utils.check_s_attrs(s, ids)
             iteration += 1
             res += list(ids)
         except gl.OutOfRangeError:
             break
     utils.check_sorted_equal(
         res, range(self._train_node_range[0], self._train_node_range[1]))
    def test_1hop_using_gsl(self):
        """ Full neighbor sample with gsl api.
    """
        gl.set_eager_mode(True)
        expand_factor = 0
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor).by("full") \
          .inV().emit()

        nodes = nbrs[2]

        index = 0
        for node in nodes:
            utils.check_sorted_equal(
                utils.fixed_dst_ids(ids[index], self._node2_range), node.ids)
            index += 1
        utils.check_node_ids(nodes, self._node2_ids)
        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_weights(nodes)
        utils.check_node_labels(nodes)
  def test_2hop_using_gsl(self):
    """ Test case for sample 2 hop neighbor with strategy of edge_weight.
    """
    gl.set_eager_mode(True)
    expand_factor = [3, 2]
    ids = self._seed_node1_ids
    nbrs = self.g.V(self._node1_type, feed=ids) \
      .outE(self._edge1_type).sample(expand_factor[0]).by("edge_weight") \
      .inV() \
      .outE(self._edge2_type).sample(expand_factor[1]).by("edge_weight") \
      .inV().emit()

    edges = nbrs[1]
    nodes = nbrs[2]
    utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range,
                                   expected_src_ids=ids)

    ids = nodes.ids.reshape(-1)
    edges = nbrs[3]
    nodes = nbrs[4]
    utils.check_fixed_edge_dst_ids(edges, dst_range=self._node1_range,
                                   expected_src_ids=ids)
    def test_1hop_with_neighbor_missing(self):
        """ Sample neighbors for nodes which have no out neighbors,
    and get the default neighbor id.
    """
        gl.set_eager_mode(True)
        expand_factor = 6
        ids = self._seed_node1_ids_with_nbr_missing
        nbr_s = self.g.neighbor_sampler(self._edge1_type,
                                        expand_factor=expand_factor,
                                        strategy="random")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids,
                                       default_dst_id=self._default_dst_id)
        utils.check_edge_type(edges,
                              src_type=self._node1_type,
                              dst_type=self._node2_type,
                              edge_type=self._edge1_type)
        utils.check_edge_shape(edges, ids.size * expand_factor)
        utils.check_not_exist_edge_attrs(
            edges,
            default_int_attr=self._default_int_attr,
            default_float_attr=self._default_float_attr,
            default_string_attr=self._default_string_attr,
        )
        utils.check_not_exist_edge_labels(edges)

        utils.check_equal(nodes.ids, edges.dst_ids)
        utils.check_node_ids(nodes, [self._default_dst_id])
        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
        utils.check_not_exist_node_weights(nodes)
        utils.check_not_exist_node_labels(nodes)
Example #20
0
  def test_edge_iterate_using_gsl(self):
    gl.set_eager_mode(True)
    file_path = self.gen_test_data([utils.WEIGHTED], False)
    decoder = gl.Decoder(weighted=True)
    g = gl.Graph() \
      .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
    g.init(tracker=utils.TRACKER_PATH)

    batch_size = 4
    query = g.E('first').batch(batch_size).values()
    res_src = []
    res_dst = []
    max_iter = 100
    for i in range(max_iter):
      try:
        edges = g.run(query)
        utils.check_edge_weights(edges)
        res_src.extend(list(edges.src_ids))
        res_dst.extend(list(edges.dst_ids))
      except gl.OutOfRangeError:
        break
    src_ids = range(self.src_range_[0], self.src_range_[1])
    dst_ids = range(self.dst_range_[0], self.dst_range_[1])
    utils.check_sorted_equal(res_src, src_ids)
    utils.check_sorted_equal(res_dst, dst_ids)

    query = g.E('first').batch(batch_size).shuffle().values()
    max_iter = 10
    src_ids = range(self.src_range_[0], self.src_range_[1])
    dst_ids = range(self.dst_range_[0], self.dst_range_[1])
    for i in range(max_iter):
      edges = g.run(query)
      utils.check_edge_weights(edges)
      utils.check_subset(edges.src_ids, src_ids)
      utils.check_subset(edges.dst_ids, dst_ids)

    g.close()
    def test_2hop_using_gsl(self):
        """ Using gsl api.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor[0]).by("in_degree") \
          .inV() \
          .outE(self._edge2_type).sample(expand_factor[1]).by("in_degree") \
          .inV().emit()

        edges = nbrs[1]
        nodes = nbrs[2]
        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node2_range,
                                       expected_src_ids=ids)

        ids = nodes.ids.reshape(-1)
        edges = nbrs[3]
        nodes = nbrs[4]
        utils.check_fixed_edge_dst_ids(edges,
                                       dst_range=self._node1_range,
                                       expected_src_ids=ids)
 def test_query_not_exist_gsl(self):
     gl.set_eager_mode(True)
     nodes = self.g.V(self.node_type_, feed=self.not_exist_ids_) \
       .emit()
     self.check_not_exist_attrs(nodes)
 def test_1hop_with_agg(self):
     gl.set_eager_mode(True)
     ids = self._seed_node2_ids
     res = self.g.V(self._node2_type, feed=ids).outV(
         self._edge2_type).sample().by("full").emit()
     print(res[1].embedding_agg(func="sum"))