def test_rule_num_covered(): rule = Rule([COND1, COND4]) rule_num_covered = rule.num_covered(FIRST_10_EXAMPLES) len_assertion_df = len( FIRST_10_EXAMPLES[(FIRST_10_EXAMPLES[FEATURE1] == VALUE1) & (FIRST_10_EXAMPLES[FEATURE4] == VALUE4)]) assert (rule_num_covered == len_assertion_df)
def test_asruleset(): assert asruleset(Ruleset([Rule([Cond("hello", "world")]) ])) == Ruleset([Rule([Cond("hello", "world")])]) assert asruleset("[[hello=world]]") == Ruleset( [Rule([Cond("hello", "world")])]) assert asruleset(["[hello=world]" ]) == Ruleset([Rule([Cond("hello", "world")])])
def test_rule_covers(): rule = Rule([COND1, COND4]) rule_covers = rule.covers(FIRST_10_EXAMPLES) assertion_df = FIRST_10_EXAMPLES[(FIRST_10_EXAMPLES[FEATURE1] == VALUE1) & (FIRST_10_EXAMPLES[FEATURE4] == VALUE4)] assert (set(rule_covers.index.tolist()) == set( assertion_df.index.tolist()))
def grow_rule(pos_df, neg_df, possible_conds, initial_rule=Rule(), max_rule_conds=None, verbosity=0): """ Fit a new rule to add to a ruleset """ rule0 = copy.deepcopy(initial_rule) if verbosity >= 4: print(f'growing rule from initial rule: {rule0}') rule1 = copy.deepcopy(rule0) while (len(rule0.covers(neg_df)) > 0 and rule1 is not None) and ( max_rule_conds is None or len(rule1.conds) < max_rule_conds ): # Stop refining rule if no negative examples remain rule1 = best_successor(rule0, possible_conds, pos_df, neg_df, verbosity=verbosity) if rule1 is not None: rule0 = rule1 if verbosity >= 4: print(f'negs remaining {len(rule0.covers(neg_df))}') if not rule0.isempty(): if verbosity >= 2: print(f'grew rule: {rule0}') return rule0 else: #warning_str = f"grew an empty rule: {rule0} over {len(pos_idx)} pos and {len(neg_idx)} neg" #_warn(warning_str, RuntimeWarning, filename='base_functions', funcname='grow_rule') return rule0
def grow_rule_cn(cn, pos_idx, neg_idx, initial_rule=Rule(), max_rule_conds=None, verbosity=0): """ Fit a new rule to add to a ruleset """ rule0 = copy.deepcopy(initial_rule) rule1 = copy.deepcopy(rule0) if verbosity >= 4: print(f'growing rule from initial rule: {rule0}') num_neg_covered = len(cn.rule_covers(rule0, subset=neg_idx)) user_halt = (max_rule_conds is not None and len(rule1.conds) >= max_rule_conds) while num_neg_covered > 0: # Stop refining rule if no negative examples remain rule1 = best_rule_successor_cn(cn, rule0, pos_idx, neg_idx, verbosity=verbosity) if rule1 is None: break rule0 = rule1 num_neg_covered = len(cn.rule_covers(rule0, neg_idx)) if verbosity >= 4: print(f'negs remaining: {num_neg_covered}') if not rule0.isempty(): if verbosity >= 2: print(f'grew rule: {rule0}') return rule0 else: #warning_str = f"grew an empty rule: {rule0} over {len(pos_idx)} pos and {len(neg_idx)} neg" #_warn(warning_str, RuntimeWarning, filename='base_functions', funcname='grow_rule_cn') return rule0
def feat_to_num_rs(ruleset): new_ruleset = Ruleset() for rule in ruleset.rules: new_rule = Rule() for cond in rule.conds: feat = cond.feature val = cond.val new_cond = Cond(FEAT2IDX[feat], val) new_rule.conds.append(new_cond) new_ruleset.rules.append(new_rule) return new_ruleset
def best_rule_successor_cn(cn, rule, pos_idx, neg_idx, verbosity=0): best_cond = None best_gain = float('-inf') rule_covers_pos_idx = cn.rule_covers(rule, pos_idx) rule_covers_neg_idx = cn.rule_covers(rule, neg_idx) for cond_action_step in cn.conds: g = gain_cn(cn, cond_action_step, rule_covers_pos_idx, rule_covers_neg_idx) if g > best_gain: best_gain = g best_cond = cond_action_step if verbosity >= 5: print(f'gain {rnd(best_gain)} {best_cond}') return Rule(rule.conds + [best_cond]) if best_gain > 0 else None
def best_rule_successor_cn(cn, rule, pos_idx, neg_idx, verbosity=0): """Return for a Rule its best successor Rule according to FOIL information gain metric.""" best_cond = None best_gain = float("-inf") rule_covers_pos_idx = cn.rule_covers(rule, pos_idx) rule_covers_neg_idx = cn.rule_covers(rule, neg_idx) for cond_action_step in cn.conds: g = gain_cn(cn, cond_action_step, rule_covers_pos_idx, rule_covers_neg_idx) if g > best_gain: best_gain = g best_cond = cond_action_step if verbosity >= 5: print(f"gain {rnd(best_gain)} {best_cond}") return Rule(rule.conds + [best_cond]) if best_gain > 0 else None
def test_empty_rules_equal(): assert (Rule() == Rule())
def test_rules_len_2_unequal(): rule1 = Rule([COND1, COND2]) rule2 = Rule([COND2, COND3]) assert (rule1 != rule2)
def test_disordered_rules_len_2_equal(): rule1 = Rule([COND1, COND2]) rule2 = Rule([COND2, COND1]) assert (rule1 == rule2)
def test_rules_len_2_equal(): rule1 = Rule([COND1, COND2]) rule2 = Rule([COND1, COND2]) assert (rule1 == rule2)
def test_asrule(): assert asrule(Rule([Cond("hello", "world")])) == Rule([Cond("hello", "world")]) assert asrule(["hello=world"]) == Rule([Cond("hello", "world")]) assert asrule("[hello=world]") == Rule([Cond("hello", "world")])
DF = pd.read_csv("house-votes-84.csv") CLASS_FEAT = "Party" DEFAULT_CLASS_FEAT = "Class" POS_CLASS = "democrat" SPLIT_SIZE = 0.6 X_DF = DF.drop(CLASS_FEAT, axis=1) Y_DF = DF[CLASS_FEAT] XY_NP = DF.values X_NP = X_DF.values Y_NP = Y_DF.values NP_CLASS_FEAT = 0 IREP_RULESET_42 = Ruleset([ Rule([Cond("physician-fee-freeze", "n")]), Rule([ Cond("synfuels-corporation-cutback", "y"), Cond("education-spending", "n") ]), ]) RIP_RULESET_42 = Ruleset([ Rule([Cond("physician-fee-freeze", "n")]), Rule([ Cond("synfuels-corporation-cutback", "y"), Cond("education-spending", "n") ]), Rule([ Cond("synfuels-corporation-cutback", "y"), Cond("adoption-of-the-budget-resolution", "y"),
def test_rules_len_1_equal(): assert (Rule([COND1]) == Rule([COND1]))
def test_rules_len_1_unequal(): assert (Rule([COND1]) != Rule([COND2]))
def _ruleset_from_df(self, model_df): rules = [] for _, row in model_df.iterrows(): rules.append(Rule([cond_fromstr(c) for c in row if isinstance(c, str)])) return Ruleset(rules)