def test_get_sorted_box_lims(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)]) box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ["c", "a"])
def test_get_sorted_box_lims(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ['c', 'a'])
def stats(self): if self._stats: return self._stats boxes = self.boxes box_init = sdutil._make_box(self.x) self._stats = [] for box in boxes: boxstats = self._boxstat_methods[self.mode](self, box, box_init) self._stats.append(boxstats) return self._stats
def test_normalize(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)]) uncs = np.lib.recfunctions.get_names(box_init.dtype) # @UndefinedVariable normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg="lower unequal for " + uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg="upper unequal for " + uncs[i])
def test_make_box(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)]) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims["a"]), 0, "min a fails") self.assertEqual(np.max(box_lims["a"]), 3, "max a fails") self.assertEqual(np.min(box_lims["b"]), 1, "min b fails") self.assertEqual(np.max(box_lims["b"]), 5, "max c fails") self.assertEqual(np.min(box_lims["c"]), 1, "min c fails") self.assertEqual(np.max(box_lims["c"]), 6, "max c fails")
def test_determine_nr_restricted_dims(self): x = np.random.rand(10) x = np.asarray(x, dtype=[("a", np.float), ("b", np.float)]) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = np.array([(1, 1), (0, 1)], dtype=[("a", np.float), ("b", np.float)]) n = sdutil._determine_nr_restricted_dims(b, box_init) self.assertEqual(n, 2)
def test_make_box(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_determine_nr_restricted_dims(self): x = np.random.rand(10, ) x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)]) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = np.array([(1, 1), (0, 1)], dtype=[('a', np.float), ('b', np.float)]) n = sdutil._determine_nr_restricted_dims(b, box_init) self.assertEqual(n, 2)
def test_make_box(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_determine_restricted_dims(self): x = np.random.rand(10, ) x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)]) # all dimensions the same box_init = sdutil._make_box(x) u = sdutil._determine_restricted_dims(box_init, box_init) self.assertEqual(list(u), []) # dimensions 1 different and dimension 2 the same b = np.array([(1,1), (0,1)], dtype=[('a', np.float), ('b', np.float)]) u = sdutil._determine_restricted_dims(b, box_init) self.assertEqual(list(u), ['a', 'b'])
def test_normalize(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) uncs = np.lib.recfunctions.get_names( box_init.dtype) # @UndefinedVariable normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg='lower unequal for ' + uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg='upper unequal for ' + uncs[i])
def __init__(self, x, y, threshold=None, obj_function=DEFAULT, peel_alpha=0.05, paste_alpha=0.05, mass_min=0.05, threshold_type=ABOVE): ''' Parameters ---------- x : structured array the independent variables y : 1d numpy array the dependent variable threshold : float the coverage threshold that a box has to meet peel_alpha : float, optional parameter controlling the peeling stage (default = 0.05). paste_alpha : float, optional parameter controlling the pasting stage (default = 0.05). mass_min : float, optional minimum mass of a box (default = 0.05). threshold_type : {ABOVE, BELOW} whether to look above or below the threshold value obj_func : callable, optional the objective function used by PRIM Raises ------ AssertionError if threshold is None ''' assert threshold!=None self.x = x self.y = y if len(self.y.shape) > 1: raise PrimException("y is not a 1-d array") # store the remainder of the parameters self.paste_alpha = paste_alpha self.peel_alpha = peel_alpha self.mass_min = mass_min self.threshold = threshold self.threshold_type = threshold_type self.obj_func = self._obj_functions[obj_function] # set the indices self.yi = np.arange(0, self.y.shape[0]) # how many data points do we have self.n = self.y.shape[0] # how many cases of interest do we have? self.t_coi = self.determine_coi(self.yi) # initial box that contains all data self.box_init = sdutil._make_box(self.x) # make a list in which the identified boxes can be put self._boxes = [] self._update_yi_remaining()
def boxes(self): assert self.clf if self._boxes: return self._boxes # based on # http://stackoverflow.com/questions/20224526/how-to-extract-the- # decision-rules-from-scikit-learn-decision-tree assert self.clf left = self.clf.tree_.children_left right = self.clf.tree_.children_right threshold = self.clf.tree_.threshold features = [self.feature_names[i] for i in self.clf.tree_.feature] # get ids of leaf nodes leafs = np.argwhere(left == -1)[:, 0] def recurse(left, right, child, lineage=None): if lineage is None: # lineage = [self.clf.tree_.value[child]] lineage = [] if child in left: parent = np.where(left == child)[0].item() split = 'l' else: parent = np.where(right == child)[0].item() split = 'r' lineage.append( (parent, split, threshold[parent], features[parent])) if parent == 0: lineage.reverse() return lineage else: return recurse(left, right, parent, lineage) box_init = sdutil._make_box(self.x) boxes = [] for leaf in leafs: branch = recurse(left, right, leaf) # print(branch) box = np.copy(box_init) for node in branch: direction = node[1] value = node[2] unc = node[3] if direction == 'l': try: box[unc][1] = value except ValueError: unc, cat = unc.split(self.sep) cats = box[unc] cats.pop(cat) box[unc][:] = cats else: try: if (box.dtype.fields[unc][0]) == np.int32: value = math.ceil(value) box[unc][0] = value except ValueError: # we are in the right hand branch, so # the category is included pass boxes.append(box) self._boxes = boxes return self._boxes
def boxes(self): assert self.clf if self._boxes: return self._boxes # based on # http://stackoverflow.com/questions/20224526/how-to-extract-the- # decision-rules-from-scikit-learn-decision-tree assert self.clf left = self.clf.tree_.children_left right = self.clf.tree_.children_right threshold = self.clf.tree_.threshold features = [self.feature_names[i] for i in self.clf.tree_.feature] # get ids of leaf nodes leafs = np.argwhere(left == -1)[:,0] def recurse(left, right, child, lineage=None): if lineage is None: # lineage = [self.clf.tree_.value[child]] lineage = [] if child in left: parent = np.where(left == child)[0].item() split = 'l' else: parent = np.where(right == child)[0].item() split = 'r' lineage.append((parent, split, threshold[parent], features[parent])) if parent == 0: lineage.reverse() return lineage else: return recurse(left, right, parent, lineage) box_init = sdutil._make_box(self.x) boxes = [] for leaf in leafs: branch = recurse(left, right, leaf) # print(branch) box = np.copy(box_init) for node in branch: direction = node[1] value = node[2] unc = node[3] if direction=='l': try: box[unc][1] = value except ValueError: unc, cat = unc.split(self.sep) cats = box[unc] cats.pop(cat) box[unc][:]=cats else: try: if (box.dtype.fields[unc][0])==np.int32: value = math.ceil(value) box[unc][0] = value except ValueError: # we are in the right hand branch, so # the category is included pass boxes.append(box) self._boxes = boxes return self._boxes