Beispiel #1
0
    def create_rule_from_argument(arg, data, inst):
        """ Create initial rules. """
        X, Y, W = data.X, data.Y, data.W if data.W else None
        Y = Y.astype(dtype=int)

        neg = arg.startswith("~")
        if neg:
            warn('Negative arguments are not yet supported. Skipping them.')
            return None, None
        arg = arg.strip("{}").strip()
        att_cons = [att.strip() for att in arg.split(",")]
        # create a rule from fixed constraints
        # undefined constraints leave for now
        selectors = []
        unfinished = []
        for aci, ac in enumerate(att_cons):
            column, op, value = ABRuleLearner.parse_constraint(ac, data, inst)
            if column is None:
                warn("Can not parse {}. Please check the type of attribute.".
                     format(ac))
                continue
            elif isinstance(value, str) and value.startswith('?'):
                value = float(value[1:])
                unfinished.append(aci)
            elif isinstance(value, str):
                # set maximum/minimum value
                if op == ">=":
                    value = np.min(data.X[column])
                else:
                    value = np.max(data.X[column])
            selectors.append(Selector(column=column, op=op, value=value))
        rule = Rule(selectors=selectors, domain=data.domain)
        rule.filter_and_store(X, Y, W, Y[inst])
        return rule, unfinished
Beispiel #2
0
 def create_initial_star(self, X, Y, W, prior):
     """ Initial star in ABML contains all positive arguments. """
     star = []
     for cli, cls in enumerate(self.domain.class_var.values):
         if self.target_class is None or cli == self.target_class or cls == self.target_class:
             # select base rules that have class cls
             base_cls = [
                 r for r in self.base_rules if cli == r.target_class
             ]
             # add default to base
             base_cls.append(Rule(selectors=[], domain=self.domain))
             rules = self.rule_finder.search_strategy.initialise_rule(
                 X, Y, W, cli, base_cls, self.domain, prior, prior,
                 self.evaluator, self.rule_finder.complexity_evaluator,
                 self.rule_validator, self.rule_finder.general_validator)
             star.extend(rules)
     for r in star:
         r.default_rule = r
         if r.length > 0:
             for ind in np.nonzero(self.cons_index)[0]:
                 r.create_model()
                 str_r = str(r)
                 for cri, cr in enumerate(self.constraints[ind]):
                     if isinstance(cr, str) and str_r == cr:
                         self.constraints[ind][
                             cri] = r  # replace string with rule
         r.do_evaluate()
     return star
Beispiel #3
0
    def test_base_RuleLearner(self):
        """
        Base rule induction learner test. To pass the test, all base
        components are checked, including preprocessors, top-level
        control procedure elements (covering algorithm, rule stopping,
        data stopping), and bottom-level search procedure controller
        (rule finder).

        Every learner that extends _RuleLearner should override the fit
        method. It should at this point not yet be available (exception
        raised).
        """
        base_rule_learner = _RuleLearner()
        self.assertRaises(NotImplementedError, base_rule_learner.fit,
                          self.iris.X, self.iris.Y)

        # test the number of default preprocessors
        self.assertEqual(len(list(base_rule_learner.active_preprocessors)), 2)

        # preprocessor types
        preprocessor_types = [
            type(x) for x in base_rule_learner.active_preprocessors
        ]
        self.assertIn(RemoveNaNClasses, preprocessor_types)
        self.assertIn(Impute, preprocessor_types)

        # test find_rules
        base_rule_learner.domain = self.iris.domain
        base_rule_learner.find_rules(self.iris.X, self.iris.Y.astype(int),
                                     None, None, [], self.iris.domain)

        # test top-level control procedure elements
        self.assertTrue(hasattr(base_rule_learner, "data_stopping"))
        self.assertTrue(hasattr(base_rule_learner, "cover_and_remove"))
        self.assertTrue(hasattr(base_rule_learner, "rule_stopping"))

        # test exclusive covering algorithm
        new_rule = Rule()
        new_rule.covered_examples = np.array([True, False, True], dtype=bool)
        new_rule.target_class = None

        X, Y, W = base_rule_learner.exclusive_cover_and_remove(
            self.iris.X[:3], self.iris.Y[:3], None, new_rule)
        self.assertTrue(len(X) == len(Y) == 1)

        # test rule finder
        self.assertTrue(hasattr(base_rule_learner, "rule_finder"))
        rule_finder = base_rule_learner.rule_finder
        self.assertIsInstance(rule_finder, RuleHunter)
        self.assertTrue(hasattr(rule_finder, "search_algorithm"))
        self.assertTrue(hasattr(rule_finder, "search_strategy"))
        self.assertTrue(hasattr(rule_finder, "quality_evaluator"))
        self.assertTrue(hasattr(rule_finder, "complexity_evaluator"))
        self.assertTrue(hasattr(rule_finder, "general_validator"))
        self.assertTrue(hasattr(rule_finder, "significance_validator"))
 def create_initial_goal(domain, conditions):
     """ Initial goal has only static conditions. """
     selectors = []
     for c in conditions:
         column = domain.index(c[0])
         feature = domain[column]
         if isinstance(feature, DiscreteVariable):
             value = feature.values.index(c[2])
         else:
             value = c[2]
         selectors.append(Selector(column, c[1], value))
     rule = Rule(selectors=selectors, domain=domain)
     rule.prediction = 0
     return Goal(rule)
Beispiel #5
0
 def create_parent(self, rule, X, Y, W):
     if rule.parent_rule:
         return rule.parent_rule
     if not rule.selectors:
         return None
     selectors = rule.selectors[:-1]
     new_rule = Rule(selectors=selectors,
                     domain=rule.domain,
                     initial_class_dist=rule.initial_class_dist,
                     prior_class_dist=rule.prior_class_dist,
                     quality_evaluator=rule.quality_evaluator,
                     complexity_evaluator=rule.complexity_evaluator,
                     significance_validator=rule.significance_validator,
                     general_validator=rule.general_validator)
     new_rule.filter_and_store(X, Y, W, rule.target_class)
     new_rule.do_evaluate()
     return new_rule
Beispiel #6
0
    def specialize(self, rule, unfinished_selectors, data, instance_index):
        """ Specialization of rule that is consistent with arguments (unfinished selectors). """
        X, Y, W = data.X, data.Y, data.W if data.W else None
        Y = Y.astype(dtype=int)

        rule.general_validator = self.rule_finder.general_validator
        self.rule_finder.search_strategy.storage = {}
        rules = [rule]
        star = [rule]
        while star:
            new_star = []
            for rs in star:
                refined = self.rule_finder.search_strategy.refine_rule(
                    X, Y, W, rs)
                # check each refined rule whether it is consistent with unfinished_selectors
                for ref_rule in refined:
                    # check last selector if it is consistent with unfinished_selectors
                    sel = ref_rule.selectors[-1]
                    for i, (old_sel) in enumerate(ref_rule.selectors[:-1]):
                        if (old_sel.column, old_sel.op) == (sel.column, sel.op) and \
                                        i in unfinished_selectors:
                            # this rules is candidate for further specialization
                            # create a copy of rule
                            new_rule = Rule(
                                selectors=copy(rule.selectors),
                                domain=rule.domain,
                                initial_class_dist=rule.initial_class_dist,
                                prior_class_dist=rule.prior_class_dist,
                                quality_evaluator=rule.quality_evaluator,
                                complexity_evaluator=rule.complexity_evaluator,
                                significance_validator=rule.
                                significance_validator,
                                general_validator=rule.general_validator)
                            new_rule.selectors[i] = Selector(column=sel.column,
                                                             op=sel.op,
                                                             value=sel.value)
                            new_rule.filter_and_store(X, Y, W,
                                                      rule.target_class)
                            if new_rule.covered_examples[instance_index]:
                                rules.append(new_rule)
                                new_star.append(new_rule)
                            break
            star = new_star
        return rules