def buildIndex(self): """ Function to build the tree structure, fanout = 4 by default for spatial (2D) data """ budget_c = self.getCountBudget() self.root.n_count = self.getCount( self.root, budget_c[0]) # ## add noisy count to root stack = deque() stack.append(self.root) nleaf = 0 # ## leaf counter max_depth = -1 # ## main loop while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # ## curr is a leaf node if curr.n_depth < Params.maxHeight: # ## if a node ends up earlier than maxHeight, it should be able to use the remaining count budget remainingEps = sum(budget_c[curr.n_depth + 1:]) curr.n_count = self.getCount(curr, remainingEps) nleaf += 1 curr.n_isLeaf = True self.cell_setLeaf(curr) else: # ## curr needs to split curr.n_budget -= 1 # ## some budget will be used regardless the split is successful or not tmp = self.getCoordinates(curr) nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode( ), KNode() # create sub-nodes nw_coord, ne_coord, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp x_nw, y_nw = nw_coord x_se, y_se = ne_coord # ## update bounding box, depth, count, budget for the four subnodes nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw], [x_nw, curr.n_box[1, 1]]]) ne_node.n_box = np.array([[x_nw, y_se], [curr.n_box[1, 0], curr.n_box[1, 1]]]) sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]], [x_se, y_nw]]) se_node.n_box = np.array([[x_se, curr.n_box[0, 1]], [curr.n_box[1, 0], y_se]]) for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 # if (sub_node.n_depth == Params.maxHeight and sub_node.n_data is not None): # print len(sub_node.n_data[0]) sub_node.n_count = self.getCount( sub_node, budget_c[sub_node.n_depth]) sub_node.n_budget = curr.n_budget stack.append(sub_node) curr.n_data = None # ## do not need the data points coordinates now curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth)
def buildIndex(self): """ Function to build the tree structure, fanout = 4 by default for spatial (2D) data """ budget_c = self.getCountBudget() self.root.n_count = self.getCount(self.root, budget_c[0]) # ## add noisy count to root stack = deque() stack.append(self.root) nleaf = 0 # ## leaf counter max_depth = -1 # ## main loop while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # ## curr is a leaf node if curr.n_depth < Params.maxHeight: # ## if a node ends up earlier than maxHeight, it should be able to use the remaining count budget remainingEps = sum(budget_c[curr.n_depth + 1:]) curr.n_count = self.getCount(curr, remainingEps) nleaf += 1 curr.n_isLeaf = True self.cell_setLeaf(curr) else: # ## curr needs to split curr.n_budget -= 1 # ## some budget will be used regardless the split is successful or not tmp = self.getCoordinates(curr) nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(), KNode() # create sub-nodes nw_coord, ne_coord, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp x_nw, y_nw = nw_coord x_se, y_se = ne_coord # ## update bounding box, depth, count, budget for the four subnodes nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw], [x_nw, curr.n_box[1, 1]]]) ne_node.n_box = np.array([[x_nw, y_se], [curr.n_box[1, 0], curr.n_box[1, 1]]]) sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]], [x_se, y_nw]]) se_node.n_box = np.array([[x_se, curr.n_box[0, 1]], [curr.n_box[1, 0], y_se]]) for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 # if (sub_node.n_depth == Params.maxHeight and sub_node.n_data is not None): # print len(sub_node.n_data[0]) sub_node.n_count = self.getCount(sub_node, budget_c[sub_node.n_depth]) sub_node.n_budget = curr.n_budget stack.append(sub_node) curr.n_data = None # ## do not need the data points coordinates now curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth)
def buildIndex(self): stack = deque() stack.append(self.root) nleaf = 0 # leaf counter max_depth = -1 self.root.n_count = np.sum(self.mapp) while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # curr is a leaf node nleaf += 1 curr.n_isLeaf = True self.cell_setLeaf(curr) else: # curr needs to split curr.n_budget -= 1 tmp = self.getCoordinates(curr) nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode( ), KNode() # create sub-nodes nw_coord, ne_coord, count_tmp = tmp x_nw, y_nw = nw_coord x_se, y_se = ne_coord nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw], [x_nw, curr.n_box[1, 1]]]) ne_node.n_box = np.array([[x_nw, y_se], [curr.n_box[1, 0], curr.n_box[1, 1]]]) sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]], [x_se, y_nw]]) se_node.n_box = np.array([[x_se, curr.n_box[0, 1]], [curr.n_box[1, 0], y_se]]) c_t = 0 for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 sub_node.n_count = count_tmp[c_t] sub_node.n_budget = curr.n_budget stack.append(sub_node) c_t += 1 curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth)
def buildIndex(self): stack = deque() stack.append(self.root) nleaf = 0 # leaf counter max_depth = -1 self.root.n_count = np.sum(self.mapp) while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # curr is a leaf node nleaf += 1 curr.n_isLeaf = True self.cell_setLeaf(curr) else: # curr needs to split curr.n_budget -= 1 tmp = self.getCoordinates(curr) nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(), KNode() # create sub-nodes nw_coord, ne_coord, count_tmp = tmp x_nw, y_nw = nw_coord x_se, y_se = ne_coord nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw], [x_nw, curr.n_box[1, 1]]]) ne_node.n_box = np.array([[x_nw, y_se], [curr.n_box[1, 0], curr.n_box[1, 1]]]) sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]], [x_se, y_nw]]) se_node.n_box = np.array([[x_se, curr.n_box[0, 1]], [curr.n_box[1, 0], y_se]]) c_t = 0 for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 sub_node.n_count = count_tmp[c_t] sub_node.n_budget = curr.n_budget stack.append(sub_node) c_t += 1 curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth)
def buildIndex(self): budget_c = self.getCountBudget() logging.debug('encoding coordinates...') RES = self.param.Res # order of Hilbert curve ndata = self.realData.shape[1] hidx = np.zeros(ndata) for i in range(ndata): hx, hy = self.get_Hcoord(self.realData[0, i], self.realData[1, i], RES) hidx[i] = self.h_encode(hx, hy, RES) hidx = np.sort(hidx) logging.debug('building index...') self.root.n_data = hidx self.root.n_box = (0, 2**(2 * RES) - 1) self.root.n_count = self.getCount(self.root, budget_c[0]) stack = deque() stack.append(self.root) tree = [self.root] leaf_li = [] # storage of all leaves nleaf = 0 # leaf counter max_depth = -1 while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # curr is a leaf node if curr.n_depth < Params.maxHeight: remainingEps = sum(budget_c[curr.n_depth + 1:]) curr.n_count = self.getCount(curr, remainingEps) nleaf += 1 curr.n_isLeaf = True leaf_li.append(curr) else: # curr needs to split curr.n_budget -= 1 tmp = self.getCoordinates(curr) if tmp is False: # if split fails stack.append(curr) continue nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode( ), KNode() # create sub-nodes split_prm, split_sec1, split_sec2, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp nw_node.n_box = (curr.n_box[0], split_sec1) ne_node.n_box = (split_sec1, split_prm) sw_node.n_box = (split_prm, split_sec2) se_node.n_box = (split_sec2, curr.n_box[1]) for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 sub_node.n_count = self.getCount( sub_node, budget_c[sub_node.n_depth]) sub_node.n_budget = curr.n_budget stack.append(sub_node) tree.append(sub_node) curr.n_data = None curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth) # # convert hilbert values in leaf nodes to real coordinates and update bounding box logging.debug('decoding and updating bounding box...') for leaf in leaf_li: bbox = np.array([[1000.0, 1000.0], [-1000.0, -1000.0]], dtype='float64') for hvalue in leaf.n_data: hx, hy = self.h_decode(int(hvalue), RES) x, y = self.get_Rcoord(hx, hy, RES) bbox[0, 0] = x if x < bbox[0, 0] else bbox[0, 0] bbox[1, 0] = x if x > bbox[1, 0] else bbox[1, 0] bbox[0, 1] = y if y < bbox[0, 1] else bbox[0, 1] bbox[1, 1] = y if y > bbox[1, 1] else bbox[1, 1] leaf.n_box = bbox # # update bounding box bottom-up tree = sorted(tree, cmp=self.cmp_node) logging.debug('updating box for each node in the tree...') for node in tree: if node.n_data is None: node.n_box = np.zeros((2, 2)) node.n_box[0, 0] = min(node.ne.n_box[0, 0], node.nw.n_box[0, 0], node.se.n_box[0, 0], node.sw.n_box[0, 0]) node.n_box[0, 1] = min(node.ne.n_box[0, 1], node.nw.n_box[0, 1], node.se.n_box[0, 1], node.sw.n_box[0, 1]) node.n_box[1, 0] = max(node.ne.n_box[1, 0], node.nw.n_box[1, 0], node.se.n_box[1, 0], node.sw.n_box[1, 0]) node.n_box[1, 1] = max(node.ne.n_box[1, 1], node.nw.n_box[1, 1], node.se.n_box[1, 1], node.sw.n_box[1, 1])
def buildIndex(self): budget_c = self.getCountBudget() logging.debug('encoding coordinates...') RES = self.param.Res # order of Hilbert curve ndata = self.realData.shape[1] hidx = np.zeros(ndata) for i in range(ndata): hx, hy = self.get_Hcoord(self.realData[0, i], self.realData[1, i], RES) hidx[i] = self.h_encode(hx, hy, RES) hidx = np.sort(hidx) logging.debug('building index...') self.root.n_data = hidx self.root.n_box = (0, 2 ** (2 * RES) - 1) self.root.n_count = self.getCount(self.root, budget_c[0]) stack = deque() stack.append(self.root) tree = [self.root] leaf_li = [] # storage of all leaves nleaf = 0 # leaf counter max_depth = -1 while len(stack) > 0: curr = stack.popleft() if curr.n_depth > max_depth: max_depth = curr.n_depth if self.testLeaf(curr) is True: # curr is a leaf node if curr.n_depth < Params.maxHeight: remainingEps = sum(budget_c[curr.n_depth + 1:]) curr.n_count = self.getCount(curr, remainingEps) nleaf += 1 curr.n_isLeaf = True leaf_li.append(curr) else: # curr needs to split curr.n_budget -= 1 tmp = self.getCoordinates(curr) if tmp is False: # if split fails stack.append(curr) continue nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(), KNode() # create sub-nodes split_prm, split_sec1, split_sec2, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp nw_node.n_box = (curr.n_box[0], split_sec1) ne_node.n_box = (split_sec1, split_prm) sw_node.n_box = (split_prm, split_sec2) se_node.n_box = (split_sec2, curr.n_box[1]) for sub_node in [nw_node, ne_node, sw_node, se_node]: sub_node.n_depth = curr.n_depth + 1 sub_node.n_count = self.getCount(sub_node, budget_c[sub_node.n_depth]) sub_node.n_budget = curr.n_budget stack.append(sub_node) tree.append(sub_node) curr.n_data = None curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node # end of while logging.debug("number of leaves: %d" % nleaf) logging.debug("max depth: %d" % max_depth) # # convert hilbert values in leaf nodes to real coordinates and update bounding box logging.debug('decoding and updating bounding box...') for leaf in leaf_li: bbox = np.array([[1000.0, 1000.0], [-1000.0, -1000.0]], dtype='float64') for hvalue in leaf.n_data: hx, hy = self.h_decode(int(hvalue), RES) x, y = self.get_Rcoord(hx, hy, RES) bbox[0, 0] = x if x < bbox[0, 0] else bbox[0, 0] bbox[1, 0] = x if x > bbox[1, 0] else bbox[1, 0] bbox[0, 1] = y if y < bbox[0, 1] else bbox[0, 1] bbox[1, 1] = y if y > bbox[1, 1] else bbox[1, 1] leaf.n_box = bbox # # update bounding box bottom-up tree = sorted(tree, cmp=self.cmp_node) logging.debug('updating box for each node in the tree...') for node in tree: if node.n_data is None: node.n_box = np.zeros((2, 2)) node.n_box[0, 0] = min(node.ne.n_box[0, 0], node.nw.n_box[0, 0], node.se.n_box[0, 0], node.sw.n_box[0, 0]) node.n_box[0, 1] = min(node.ne.n_box[0, 1], node.nw.n_box[0, 1], node.se.n_box[0, 1], node.sw.n_box[0, 1]) node.n_box[1, 0] = max(node.ne.n_box[1, 0], node.nw.n_box[1, 0], node.se.n_box[1, 0], node.sw.n_box[1, 0]) node.n_box[1, 1] = max(node.ne.n_box[1, 1], node.nw.n_box[1, 1], node.se.n_box[1, 1], node.sw.n_box[1, 1])