def test_2hop_using_gsl_with_undirected_edge(self): """ Using gsl api and sample neighbor on undirected edges whose source node and dst node has defferent type. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor[0]).by("random") \ .inV() \ .inE(self._edge1_type).sample(expand_factor[1]).by("random") \ .inV().emit() edges1 = nbrs[1] nodes1 = nbrs[2] edges2 = nbrs[3] nodes2 = nbrs[4] utils.check_fixed_edge_dst_ids(edges1, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges1, self._node1_type, self._node2_type, self._edge1_type) utils.check_edge_type(edges2, self._node2_type, self._node1_type, self._edge1_type + "_reverse") utils.check_node_type(nodes1, self._node2_type) utils.check_node_type(nodes2, self._node1_type) utils.check_node_ids(nodes2, self._node1_ids)
def test_1hop_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("in_degree") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_1hop_with_neighbor_missing(self): """ Test case for sample 1 hop neighbor with strategy of edge_weight. Some of src_ids have no neighbor. """ expand_factor = 6 ids = self._seed_node1_ids_with_nbr_missing nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="edge_weight") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids, default_dst_id=self._default_dst_id) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_not_exist_edge_attrs( edges, default_int_attr=self._default_int_attr, default_float_attr=self._default_float_attr, default_string_attr=self._default_string_attr,) utils.check_not_exist_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, [self._default_dst_id]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_not_exist_node_weights(nodes) utils.check_not_exist_node_labels(nodes)
def test_1hop(self): """ Sample one hop of neighbors. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) print(edges.src_nodes.get_out_degrees(self._edge1_type)) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_1hop_using_gsl(self): """ Test case for sample 1 hop neighbor. hetegerous graph with edge attrs, without edge weight. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("edge_weight") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_1hop(self): """ Test case for sample 1 hop neighbor with strategy of edge_weight. All the src_ids have neighbors. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="edge_weight") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_1hop_with_neighbor_missing(self): """ Sample neighbors for nodes which have no out neighbors, and get the default neighbor id. """ expand_factor = 6 ids = self._seed_node1_ids_with_nbr_missing nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="in_degree") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids, default_dst_id=self._default_dst_id) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_not_exist_edge_attrs( edges, default_int_attr=self._default_int_attr, default_float_attr=self._default_float_attr, default_string_attr=self._default_string_attr,) utils.check_not_exist_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, [self._default_dst_id]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_not_exist_node_weights(nodes) utils.check_not_exist_node_labels(nodes)
def test_node_iterate_from_graph(self): file_path = self.gen_test_data([utils.ATTRIBUTED], False) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_SRC) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "user") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.src_range_[0], self.src_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_SRC) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="by_order", node_from=gl.EDGE_DST) res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = sampler.get() utils.check_node_type(nodes, "item") res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_ids, ids) sampler = g.node_sampler('first', batch_size=batch_size, strategy="random", node_from=gl.EDGE_DST) max_iter = 10 for i in range(max_iter): nodes = sampler.get() utils.check_subset(nodes.ids, ids) g.close()
def test_neg(self): """ Sample negative neighbors with node_weight of the target nodes. """ expand_factor = 6 ids = self._seed_node2_ids nbr_s = self.g.negative_sampler(self._node2_type, expand_factor=expand_factor, strategy="node_weight") nodes = nbr_s.get(ids) utils.check_ids(nodes.ids, [i for i in range(100, 200) if i not in ids]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_1hop_with_expand_factor(self): """ Test case for sample 1 hop neighbor with strategy of edge_weight. All the src_ids have neighbors. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor, strategy="full") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type)
def test_2hop(self): """ Sample 2 hops of neighbors. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler([self._edge1_type, self._edge2_type], expand_factor=expand_factor, strategy="in_degree") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor[0]) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor[0]) utils.check_node_weights(nodes) utils.check_node_labels(nodes) ids = nodes.ids.reshape(-1) edges = nbrs.layer_edges(2) nodes = nbrs.layer_nodes(2) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node1_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node2_type, dst_type=self._node1_type, edge_type=self._edge2_type) utils.check_edge_shape(edges, ids.size * expand_factor[1]) utils.check_edge_attrs(edges) utils.check_edge_weights(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node1_ids) utils.check_node_type(nodes, node_type=self._node1_type) utils.check_node_shape(nodes, ids.size * expand_factor[1])
def test_neg_using_gremlin(self): """ Using gremlin-like api. """ expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node2_type, feed=ids) \ .Neg(self._node2_type).sample(expand_factor).by("node_weight") \ .emit() nodes = nbrs[1] utils.check_ids(nodes.ids, [i for i in range(100, 200) if i not in ids]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg_using_gsl(self): """ Using gsl api. """ import graphlearn as gl gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node2_type, feed=ids) \ .Neg(self._node2_type).sample(expand_factor).by("node_weight") \ .emit() nodes = nbrs[1] utils.check_ids(nodes.ids, [i for i in range(100, 200) if i not in ids]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg(self): """ Sample negative neighbors with in-degree of the target nodes. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.negative_sampler(self._edge1_type, expand_factor=expand_factor, strategy="in_degree") nodes = nbr_s.get(ids) for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg(self): """ Using primative api. """ expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.negative_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random") nodes = nbr_s.get(ids) for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outNeg(self._edge1_type).sample(expand_factor).by("in_degree") \ .emit() nodes = nbrs[1] for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_neg_using_gremlin(self): """ Using gremlin-like api. """ expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outNeg(self._edge1_type).sample(expand_factor).by("random") \ .emit() nodes = nbrs[1] for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids \ if iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids( nodes.ids[i * expand_factor:(i + 1) * expand_factor], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_1hop_using_gremlin(self): """ Full neighbor sample with gremlin-like api. """ expand_factor = 2 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample().by("full") \ .inV().emit() nodes = nbrs[2] index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1 utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_weights(nodes) utils.check_node_labels(nodes)