def test_2hop_using_gsl_with_undirected_edge_homo(self): """ Using gsl api and sample neighbor on undirected edges whose source node and dst node has same type. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node2_ids def repeat_fn(q, params): return q.outE(params[0]).sample(params[1]).by("random").inV() nbrs = self.g.V(self._node2_type, feed=ids) \ .repeat(repeat_fn, 2, params_list=[(self._edge3_type, 3), (self._edge3_type, 2)]) \ .emit() edges1 = nbrs[1] nodes1 = nbrs[2] edges2 = nbrs[3] nodes2 = nbrs[4] n = expand_factor[0] * expand_factor[1] for i in range(0, ids.size * 3): for dst_id in nodes2.ids.flatten()[i:i + 2]: src_id = nodes1.ids.flatten()[i] out_id = utils.fixed_dst_ids(src_id, self._node2_range) in_id = utils.fixed_dst_ids(dst_id, self._node2_range) utils.check_ids(src_id, out_id + in_id)
def setUp(self): """ prepare the data and the decoder. num_int_attrs = num_float_attrs = num_string_attrs = 2. dst_ids = utils.fixed_dst_ids(src_ids, dst_range). with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge. """ self._node1_type = "node1" self._node2_type = "node2" self._edge1_type = "edge1" self._edge2_type = "edge2" self._edge3_type = "edge3" # for subgraph sampler self._node3_type = "entity" self._edge4_type = "relation" # for conditional negative sampler self._cond_node_type = "cond_item" self._cond_edge_type = "cond_sim" self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) self._node2_decoder = gl.Decoder(weighted=True, labeled=True) self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES, labeled=True) self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES, weighted=True) self._edge3_decoder = gl.Decoder(weighted=True) self._node3_decoder = gl.Decoder(attr_types=utils.ENTITY_ATTR_TYPES, labeled=True) self._edge4_decoder = gl.Decoder(weighted=True) self._cond_node_deocder = gl.Decoder(attr_types=utils.COND_ATTR_TYPES, weighted=True) self._node1_range = (0, 100) self._node2_range = (100, 200) # test for mask. self._train_node_range = (0, 50) self._test_node_range = (50, 70) self._val_node_range = (70, 100) self._node1_ids = range(self._node1_range[0], self._node1_range[1]) self._node2_ids = utils.fixed_dst_ids(self._node1_ids, self._node2_range) self._seed_node1_ids = np.array([2, 7, 8]) self._seed_node2_ids = np.array([102, 107, 108]) self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110]) self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108]) # there has no edge whose src_id = 5 | 105 self._default_dst_id = -1 self._default_int_attr = 1000 self._default_float_attr = 999.9 self._default_string_attr = 'hehe' if self.needs_initial: self.initialize() if not self.g: time.sleep(1)
def test_sampling_with_mask_eager_mode(self): gl.set_eager_mode(True) bs = 8 q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \ .each( lambda e: (e.outV().alias('src'), e.inV().alias('dst') \ .outV(self._edge2_type).sample(3).by('topk').alias('nbr')) ).values(lambda x: (x['src'].ids, x['val'].labels, x['dst'].ids, x['dst'].weights, x['dst'].labels, x['nbr'].ids, x['nbr'].int_attrs)) iteration = 0 for i in range(2): res = [] while True: try: sid, elb, did, dwei, dlb, nid, ni = q.next() utils.check_id_weights(did, dwei) utils.check_equal(dlb, did) iteration += 1 res += list(sid) except gl.OutOfRangeError: break whole = range(self._val_node_range[0], self._val_node_range[1]) expected = [] for elem in whole: expected += [elem] * len( utils.fixed_dst_ids(elem, self._node2_range)) utils.check_sorted_equal(res, expected)
def test_1hop(self): """ Sample full neighbors. """ ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, 1, strategy="full") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1
def test_1hop_circular_padding(self): """ Sample one hop of neighbors. """ gl.set_padding_mode(gl.CIRCULAR) expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random_without_replacement") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) for iid, nbrs in zip(ids, nodes.ids): full_nbrs = utils.fixed_dst_ids(iid, (100, 200)) utils.check_set_equal(nbrs, full_nbrs)
def test_full_sample(self): q = self.g.V(self._node2_type).batch(4).alias('a') \ .outV(self._edge2_type).sample(3).by("full").alias('b') \ .values(lambda x: (x['a'].ids, x['b'].ids, x['b'].offsets)) dataset = gl.Dataset(q) while True: try: src, nbrs, offsets = dataset.next() start = 0 for idx, offset in enumerate(offsets): expected_nbrs = utils.fixed_dst_ids(src[idx], self._node1_range) assert offset == min(len(expected_nbrs), 3) utils.check_subset(nbrs[start: start + offset], expected_nbrs) start += offset except gl.OutOfRangeError: break
def test_neg(self): """ Using primative api. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.negative_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random") nodes = nbr_s.get(ids) for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg(self): """ Sample negative neighbors with in-degree of the target nodes. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.negative_sampler(self._edge1_type, expand_factor=expand_factor, strategy="in_degree") nodes = nbr_s.get(ids) for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_1hop_using_gremlin(self): """ Using gremlin-like api. """ gl.set_padding_mode(gl.REPLICATE) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("random_without_replacement") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] for iid, nbrs in zip(ids, nodes.ids): full_nbrs = utils.fixed_dst_ids(iid, (100, 200)) full_nbrs.extend([-1]) utils.check_set_equal(nbrs, full_nbrs)
def setUp(self): """ prepare the data and the decoder. num_int_attrs = num_float_attrs = num_string_attrs = 2. dst_ids = utils.fixed_dst_ids(src_ids, dst_range). with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge. """ self._node1_type = "node1" self._node2_type = "node2" self._edge1_type = "edge1" self._edge2_type = "edge2" self._edge3_type = "edge3" self._node1_range = (0, 100) self._node2_range = (100, 200) self._node1_ids = range(self._node1_range[0], self._node1_range[1]) self._node2_ids = utils.fixed_dst_ids(self._node1_ids, self._node2_range) self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) self._node2_decoder = gl.Decoder(weighted=True, labeled=True) self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES, labeled=True) self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES, weighted=True) self._edge3_decoder = gl.Decoder(weighted=True) self._seed_node1_ids = np.array([2, 7, 8]) self._seed_node2_ids = np.array([102, 107, 108]) self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110]) self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108]) # there has no edge whose src_id = 5 | 105 self._default_dst_id = -1 self._default_int_attr = 1000 self._default_float_attr = 999.9 self._default_string_attr = 'hehe' if self.needs_initial: self.initialize() if not self.g: time.sleep(1)
def test_neg_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outNeg(self._edge1_type).sample(expand_factor).by("in_degree") \ .emit() nodes = nbrs[1] for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg_using_gremlin(self): """ Using gremlin-like api. """ expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outNeg(self._edge1_type).sample(expand_factor).by("random") \ .emit() nodes = nbrs[1] for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids \ if iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids( nodes.ids[i * expand_factor:(i + 1) * expand_factor], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_1hop_using_gremlin(self): """ Full neighbor sample with gremlin-like api. """ expand_factor = 2 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample().by("full") \ .inV().emit() nodes = nbrs[2] index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1 utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_weights(nodes) utils.check_node_labels(nodes)