def buildIndex(self):
        """ Function to build the tree structure, fanout = 4 by default for spatial (2D) data """
        budget_c = self.getCountBudget()
        self.root.n_count = self.getCount(
            self.root, budget_c[0])  # ## add noisy count to root
        stack = deque()
        stack.append(self.root)
        nleaf = 0  # ## leaf counter
        max_depth = -1
        # ## main loop
        while len(stack) > 0:
            curr = stack.popleft()
            if curr.n_depth > max_depth:
                max_depth = curr.n_depth

            if self.testLeaf(curr) is True:  # ## curr is a leaf node
                if curr.n_depth < Params.maxHeight:  # ## if a node ends up earlier than maxHeight, it should be able to use the remaining count budget
                    remainingEps = sum(budget_c[curr.n_depth + 1:])
                    curr.n_count = self.getCount(curr, remainingEps)
                nleaf += 1
                curr.n_isLeaf = True
                self.cell_setLeaf(curr)

            else:  # ## curr needs to split
                curr.n_budget -= 1  # ## some budget will be used regardless the split is successful or not
                tmp = self.getCoordinates(curr)
                nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(
                ), KNode()  # create sub-nodes
                nw_coord, ne_coord, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp
                x_nw, y_nw = nw_coord
                x_se, y_se = ne_coord
                # ## update bounding box, depth, count, budget for the four subnodes
                nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw],
                                          [x_nw, curr.n_box[1, 1]]])
                ne_node.n_box = np.array([[x_nw, y_se],
                                          [curr.n_box[1, 0], curr.n_box[1,
                                                                        1]]])
                sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]],
                                          [x_se, y_nw]])
                se_node.n_box = np.array([[x_se, curr.n_box[0, 1]],
                                          [curr.n_box[1, 0], y_se]])

                for sub_node in [nw_node, ne_node, sw_node, se_node]:
                    sub_node.n_depth = curr.n_depth + 1
                    # if (sub_node.n_depth == Params.maxHeight and sub_node.n_data is not None):
                    # print len(sub_node.n_data[0])
                    sub_node.n_count = self.getCount(
                        sub_node, budget_c[sub_node.n_depth])
                    sub_node.n_budget = curr.n_budget
                    stack.append(sub_node)

                curr.n_data = None  # ## do not need the data points coordinates now
                curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node
        # end of while

        logging.debug("number of leaves: %d" % nleaf)
        logging.debug("max depth: %d" % max_depth)
Exemple #2
0
 def __init__(self, data, param):
     self.param = param
     self.differ = Differential(self.param.Seed)
     self.mapp = None
     self.root = KNode()
     self.realData = data
     self.root.n_box = None
     self.root.n_budget = Params.maxHeight
Exemple #3
0
 def __init__(self, data, param):
     self.param = param
     self.differ = Differential(self.param.Seed)
     # ## initialize the root
     self.root = KNode()
     self.root.n_data = data
     self.root.n_box = np.array([Params.LOW, Params.HIGH])
     self.root.n_budget = Params.maxHeight
Exemple #4
0
    def buildIndex(self):
        stack = deque()
        stack.append(self.root)
        nleaf = 0  # leaf counter
        max_depth = -1
        self.root.n_count = np.sum(self.mapp)
        while len(stack) > 0:
            curr = stack.popleft()
            if curr.n_depth > max_depth:
                max_depth = curr.n_depth
            if self.testLeaf(curr) is True:  # curr is a leaf node
                nleaf += 1
                curr.n_isLeaf = True
                self.cell_setLeaf(curr)
            else:  # curr needs to split
                curr.n_budget -= 1
                tmp = self.getCoordinates(curr)
                nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(
                ), KNode()  # create sub-nodes
                nw_coord, ne_coord, count_tmp = tmp
                x_nw, y_nw = nw_coord
                x_se, y_se = ne_coord

                nw_node.n_box = np.array([[curr.n_box[0, 0], y_nw],
                                          [x_nw, curr.n_box[1, 1]]])
                ne_node.n_box = np.array([[x_nw, y_se],
                                          [curr.n_box[1, 0], curr.n_box[1,
                                                                        1]]])
                sw_node.n_box = np.array([[curr.n_box[0, 0], curr.n_box[0, 1]],
                                          [x_se, y_nw]])
                se_node.n_box = np.array([[x_se, curr.n_box[0, 1]],
                                          [curr.n_box[1, 0], y_se]])

                c_t = 0
                for sub_node in [nw_node, ne_node, sw_node, se_node]:
                    sub_node.n_depth = curr.n_depth + 1
                    sub_node.n_count = count_tmp[c_t]
                    sub_node.n_budget = curr.n_budget
                    stack.append(sub_node)
                    c_t += 1
                curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node

        # end of while
        logging.debug("number of leaves: %d" % nleaf)
        logging.debug("max depth: %d" % max_depth)
    def buildIndex(self):
        budget_c = self.getCountBudget()
        logging.debug('encoding coordinates...')
        RES = self.param.Res  # order of Hilbert curve
        ndata = self.realData.shape[1]
        hidx = np.zeros(ndata)
        for i in range(ndata):
            hx, hy = self.get_Hcoord(self.realData[0, i], self.realData[1, i],
                                     RES)
            hidx[i] = self.h_encode(hx, hy, RES)
        hidx = np.sort(hidx)

        logging.debug('building index...')
        self.root.n_data = hidx
        self.root.n_box = (0, 2**(2 * RES) - 1)
        self.root.n_count = self.getCount(self.root, budget_c[0])

        stack = deque()
        stack.append(self.root)
        tree = [self.root]
        leaf_li = []  # storage of all leaves
        nleaf = 0  # leaf counter
        max_depth = -1

        while len(stack) > 0:
            curr = stack.popleft()
            if curr.n_depth > max_depth:
                max_depth = curr.n_depth
            if self.testLeaf(curr) is True:  # curr is a leaf node
                if curr.n_depth < Params.maxHeight:
                    remainingEps = sum(budget_c[curr.n_depth + 1:])
                    curr.n_count = self.getCount(curr, remainingEps)
                nleaf += 1
                curr.n_isLeaf = True
                leaf_li.append(curr)

            else:  # curr needs to split
                curr.n_budget -= 1
                tmp = self.getCoordinates(curr)
                if tmp is False:  # if split fails
                    stack.append(curr)
                    continue
                nw_node, ne_node, sw_node, se_node = KNode(), KNode(), KNode(
                ), KNode()  # create sub-nodes
                split_prm, split_sec1, split_sec2, nw_node.n_data, ne_node.n_data, sw_node.n_data, se_node.n_data = tmp

                nw_node.n_box = (curr.n_box[0], split_sec1)
                ne_node.n_box = (split_sec1, split_prm)
                sw_node.n_box = (split_prm, split_sec2)
                se_node.n_box = (split_sec2, curr.n_box[1])

                for sub_node in [nw_node, ne_node, sw_node, se_node]:
                    sub_node.n_depth = curr.n_depth + 1
                    sub_node.n_count = self.getCount(
                        sub_node, budget_c[sub_node.n_depth])
                    sub_node.n_budget = curr.n_budget
                    stack.append(sub_node)
                    tree.append(sub_node)
                curr.n_data = None
                curr.nw, curr.ne, curr.sw, curr.se = nw_node, ne_node, sw_node, se_node

        # end of while
        logging.debug("number of leaves: %d" % nleaf)
        logging.debug("max depth: %d" % max_depth)

        # # convert hilbert values in leaf nodes to real coordinates and update bounding box
        logging.debug('decoding and updating bounding box...')
        for leaf in leaf_li:
            bbox = np.array([[1000.0, 1000.0], [-1000.0, -1000.0]],
                            dtype='float64')
            for hvalue in leaf.n_data:
                hx, hy = self.h_decode(int(hvalue), RES)
                x, y = self.get_Rcoord(hx, hy, RES)
                bbox[0, 0] = x if x < bbox[0, 0] else bbox[0, 0]
                bbox[1, 0] = x if x > bbox[1, 0] else bbox[1, 0]
                bbox[0, 1] = y if y < bbox[0, 1] else bbox[0, 1]
                bbox[1, 1] = y if y > bbox[1, 1] else bbox[1, 1]
            leaf.n_box = bbox

        # # update bounding box bottom-up
        tree = sorted(tree, cmp=self.cmp_node)
        logging.debug('updating box for each node in the tree...')
        for node in tree:
            if node.n_data is None:
                node.n_box = np.zeros((2, 2))
                node.n_box[0,
                           0] = min(node.ne.n_box[0, 0], node.nw.n_box[0, 0],
                                    node.se.n_box[0, 0], node.sw.n_box[0, 0])
                node.n_box[0,
                           1] = min(node.ne.n_box[0, 1], node.nw.n_box[0, 1],
                                    node.se.n_box[0, 1], node.sw.n_box[0, 1])
                node.n_box[1,
                           0] = max(node.ne.n_box[1, 0], node.nw.n_box[1, 0],
                                    node.se.n_box[1, 0], node.sw.n_box[1, 0])
                node.n_box[1,
                           1] = max(node.ne.n_box[1, 1], node.nw.n_box[1, 1],
                                    node.se.n_box[1, 1], node.sw.n_box[1, 1])