コード例 #1
0
    def test_get_sorted_box_lims(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)])

        box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)

        self.assertEqual(uncs, ["c", "a"])
コード例 #2
0
    def test_get_sorted_box_lims(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)],
                           dtype=[('a', np.float), ('b', np.float),
                                  ('c', np.float)])

        box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init)

        self.assertEqual(uncs, ['c', 'a'])
コード例 #3
0
ファイル: cart.py プロジェクト: MasterNicknak007/EMAworkbench
    def stats(self):
        if self._stats:
            return self._stats

        boxes = self.boxes

        box_init = sdutil._make_box(self.x)

        self._stats = []
        for box in boxes:
            boxstats = self._boxstat_methods[self.mode](self, box, box_init)
            self._stats.append(boxstats)
        return self._stats
コード例 #4
0
    def test_normalize(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)])
        uncs = np.lib.recfunctions.get_names(box_init.dtype)  # @UndefinedVariable
        normalized = sdutil._normalize(box_lim, box_init, uncs)

        for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]):
            lower, upper = lims
            self.assertAlmostEqual(normalized[i, 0], lower, msg="lower unequal for " + uncs[i])
            self.assertAlmostEqual(normalized[i, 1], upper, msg="upper unequal for " + uncs[i])
コード例 #5
0
ファイル: cart.py プロジェクト: MasterNicknak007/EMAworkbench
 def stats(self):
     if self._stats:
         return self._stats
     
     boxes = self.boxes
     
     box_init = sdutil._make_box(self.x)
     
     self._stats = []
     for box in boxes:
         boxstats = self._boxstat_methods[self.mode](self, box, box_init)
         self._stats.append(boxstats)
     return self._stats
コード例 #6
0
    def test_make_box(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[("a", np.float), ("b", np.float), ("c", np.float)])

        box_lims = sdutil._make_box(x)

        # some test on the box
        self.assertEqual(np.min(box_lims["a"]), 0, "min a fails")
        self.assertEqual(np.max(box_lims["a"]), 3, "max a fails")

        self.assertEqual(np.min(box_lims["b"]), 1, "min b fails")
        self.assertEqual(np.max(box_lims["b"]), 5, "max c fails")

        self.assertEqual(np.min(box_lims["c"]), 1, "min c fails")
        self.assertEqual(np.max(box_lims["c"]), 6, "max c fails")
コード例 #7
0
    def test_determine_nr_restricted_dims(self):
        x = np.random.rand(10)
        x = np.asarray(x, dtype=[("a", np.float), ("b", np.float)])

        # all dimensions the same
        box_init = sdutil._make_box(x)
        n = sdutil._determine_nr_restricted_dims(box_init, box_init)

        self.assertEqual(n, 0)

        # dimensions 1 different and dimension 2 the same
        b = np.array([(1, 1), (0, 1)], dtype=[("a", np.float), ("b", np.float)])
        n = sdutil._determine_nr_restricted_dims(b, box_init)
        self.assertEqual(n, 2)
コード例 #8
0
    def test_make_box(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_lims = sdutil._make_box(x)

        # some test on the box
        self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
        self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')

        self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
        self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')

        self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
        self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
コード例 #9
0
    def test_determine_nr_restricted_dims(self):
        x = np.random.rand(10, )
        x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)])

        # all dimensions the same
        box_init = sdutil._make_box(x)
        n = sdutil._determine_nr_restricted_dims(box_init, box_init)

        self.assertEqual(n, 0)

        # dimensions 1 different and dimension 2 the same
        b = np.array([(1, 1), (0, 1)],
                     dtype=[('a', np.float), ('b', np.float)])
        n = sdutil._determine_nr_restricted_dims(b, box_init)
        self.assertEqual(n, 2)
コード例 #10
0
 def test_make_box(self):
     x = np.array([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  dtype=[('a', np.float),
                         ('b', np.float),
                         ('c', np.float)])
     
     box_lims = sdutil._make_box(x)
     
     # some test on the box
     self.assertEqual(np.min(box_lims['a']), 0, 'min a fails')
     self.assertEqual(np.max(box_lims['a']), 3, 'max a fails')
     
     self.assertEqual(np.min(box_lims['b']), 1, 'min b fails')
     self.assertEqual(np.max(box_lims['b']), 5, 'max c fails')
     
     self.assertEqual(np.min(box_lims['c']), 1, 'min c fails')
     self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
コード例 #11
0
    def test_determine_restricted_dims(self):
        x = np.random.rand(10, )
        x = np.asarray(x, dtype=[('a', np.float),
                                 ('b', np.float)])

        
        # all dimensions the same
        box_init = sdutil._make_box(x)
        u = sdutil._determine_restricted_dims(box_init, box_init)
        
        self.assertEqual(list(u), [])
        
        # dimensions 1 different and dimension 2 the same
        b = np.array([(1,1),
                      (0,1)], 
                     dtype=[('a', np.float),
                            ('b', np.float)])
        u = sdutil._determine_restricted_dims(b, box_init)
        
        self.assertEqual(list(u), ['a', 'b'])
コード例 #12
0
    def test_normalize(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])

        box_init = sdutil._make_box(x)

        box_lim = np.array([(0, 1, 1), (2, 5, 2)],
                           dtype=[('a', np.float), ('b', np.float),
                                  ('c', np.float)])
        uncs = np.lib.recfunctions.get_names(
            box_init.dtype)  # @UndefinedVariable
        normalized = sdutil._normalize(box_lim, box_init, uncs)

        for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]):
            lower, upper = lims
            self.assertAlmostEqual(normalized[i, 0],
                                   lower,
                                   msg='lower unequal for ' + uncs[i])
            self.assertAlmostEqual(normalized[i, 1],
                                   upper,
                                   msg='upper unequal for ' + uncs[i])
コード例 #13
0
ファイル: prim.py プロジェクト: rjplevin/EMAworkbench
 def __init__(self, 
              x,
              y, 
              threshold=None, 
              obj_function=DEFAULT, 
              peel_alpha=0.05, 
              paste_alpha=0.05,
              mass_min=0.05, 
              threshold_type=ABOVE):
     '''
     Parameters
     ----------
     x : structured array
         the independent variables
     y : 1d numpy array
         the dependent variable
     threshold : float
                 the coverage threshold that a box has to meet
     peel_alpha : float, optional 
                  parameter controlling the peeling stage (default = 0.05). 
     paste_alpha : float, optional
                   parameter controlling the pasting stage (default = 0.05).
     mass_min : float, optional
                minimum mass of a box (default = 0.05). 
     threshold_type : {ABOVE, BELOW}
                      whether to look above or below the threshold value
     obj_func : callable, optional
                the objective function used by PRIM
                
     Raises
     ------
     AssertionError
         if threshold is None
                  
     '''
     assert threshold!=None
     
     self.x = x
     self.y = y
     
     if len(self.y.shape) > 1:
         raise PrimException("y is not a 1-d array")
     
     # store the remainder of the parameters
     self.paste_alpha = paste_alpha
     self.peel_alpha = peel_alpha
     self.mass_min = mass_min
     self.threshold = threshold 
     self.threshold_type = threshold_type
     self.obj_func = self._obj_functions[obj_function]
    
     # set the indices
     self.yi = np.arange(0, self.y.shape[0])
    
     # how many data points do we have
     self.n = self.y.shape[0]
     
     # how many cases of interest do we have?
     self.t_coi = self.determine_coi(self.yi)
     
     # initial box that contains all data
     self.box_init = sdutil._make_box(self.x)
 
     # make a list in which the identified boxes can be put
     self._boxes = []
     
     self._update_yi_remaining()
コード例 #14
0
ファイル: cart.py プロジェクト: MasterNicknak007/EMAworkbench
    def boxes(self):
        assert self.clf

        if self._boxes:
            return self._boxes

        # based on
        # http://stackoverflow.com/questions/20224526/how-to-extract-the-
        # decision-rules-from-scikit-learn-decision-tree
        assert self.clf

        left = self.clf.tree_.children_left
        right = self.clf.tree_.children_right
        threshold = self.clf.tree_.threshold
        features = [self.feature_names[i] for i in self.clf.tree_.feature]

        # get ids of leaf nodes
        leafs = np.argwhere(left == -1)[:, 0]

        def recurse(left, right, child, lineage=None):
            if lineage is None:
                # lineage = [self.clf.tree_.value[child]]
                lineage = []

            if child in left:
                parent = np.where(left == child)[0].item()
                split = 'l'
            else:
                parent = np.where(right == child)[0].item()
                split = 'r'

            lineage.append(
                (parent, split, threshold[parent], features[parent]))

            if parent == 0:
                lineage.reverse()
                return lineage
            else:
                return recurse(left, right, parent, lineage)

        box_init = sdutil._make_box(self.x)
        boxes = []
        for leaf in leafs:
            branch = recurse(left, right, leaf)
            #             print(branch)
            box = np.copy(box_init)
            for node in branch:
                direction = node[1]
                value = node[2]
                unc = node[3]

                if direction == 'l':
                    try:
                        box[unc][1] = value
                    except ValueError:
                        unc, cat = unc.split(self.sep)
                        cats = box[unc]
                        cats.pop(cat)
                        box[unc][:] = cats
                else:
                    try:
                        if (box.dtype.fields[unc][0]) == np.int32:
                            value = math.ceil(value)

                        box[unc][0] = value
                    except ValueError:
                        # we are in the right hand branch, so
                        # the category is included
                        pass

            boxes.append(box)
        self._boxes = boxes
        return self._boxes
コード例 #15
0
ファイル: cart.py プロジェクト: MasterNicknak007/EMAworkbench
    def boxes(self):
        assert self.clf
        
        if self._boxes:
            return self._boxes
    
        # based on
        # http://stackoverflow.com/questions/20224526/how-to-extract-the-
        # decision-rules-from-scikit-learn-decision-tree
        assert self.clf
        
        left = self.clf.tree_.children_left
        right = self.clf.tree_.children_right
        threshold = self.clf.tree_.threshold
        features = [self.feature_names[i] for i in self.clf.tree_.feature]
    
        # get ids of leaf nodes
        leafs = np.argwhere(left == -1)[:,0]     
    
        def recurse(left, right, child, lineage=None):          
            if lineage is None:
                # lineage = [self.clf.tree_.value[child]]
                lineage = []
            
            if child in left:
                parent = np.where(left == child)[0].item()
                split = 'l'
            else:
                parent = np.where(right == child)[0].item()
                split = 'r'
    
            lineage.append((parent, split, threshold[parent],
                            features[parent]))
    
            if parent == 0:
                lineage.reverse()
                return lineage
            else:
                return recurse(left, right, parent, lineage)
            
        box_init = sdutil._make_box(self.x)
        boxes = []
        for leaf in leafs:
            branch = recurse(left, right, leaf)
#             print(branch)
            box = np.copy(box_init)
            for node in branch:
                direction = node[1]
                value = node[2]
                unc = node[3]
                
                if direction=='l':
                    try:
                        box[unc][1] = value
                    except ValueError:
                        unc, cat = unc.split(self.sep)
                        cats = box[unc]
                        cats.pop(cat)
                        box[unc][:]=cats
                else:
                    try:
                        if (box.dtype.fields[unc][0])==np.int32:
                            value = math.ceil(value)
                        
                        
                        box[unc][0] = value
                    except ValueError:
                        # we are in the right hand branch, so 
                        # the category is included
                        pass
                        
            boxes.append(box) 
        self._boxes = boxes
        return self._boxes