def test_rule_count_p_n3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) p, n = ds.count_p_n_rule(ds.unmake_rule(rule)) self.assertEqual(4, p) self.assertEqual(4, n)
def test_prune_rule_2(self): df = pd.read_csv('tests/test_files/mushroom.csv', encoding='utf-8', delimiter=';') ds = DictDataset(0, df) l = Literal('a5', 'in', ['c', 'f', 'm', 'p', 's', 'y']) rule = Rule() rule.add_literal(l) rule = ds.prune_rule(rule) self.assertEqual("a5 in ['c', 'f', 'm', 'p', 's', 'y']", rule.to_string())
def test_prune_rule_1(self): df = pd.read_csv('tests/test_files/testfile_3.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Female') l2 = Literal('Sex', 'in', 'Male') rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) rule = ds.prune_rule(rule) self.assertEqual("Sex in Female", rule.to_string())
def test_rule_count_p_n5(self): df = pd.read_csv('tests/test_files/testfile_4.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Female') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) ds.delete_covered(ds.unmake_rule(rule)) p, n = ds.count_p_n_rule(ds.unmake_rule(rule)) self.assertEqual(0, p) self.assertEqual(0, n)
def test_delete_not_covered_3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) len_before = ds.length() ds.delete_not_covered(ds.unmake_rule(rule)) len_after = ds.length() self.assertEqual(len_before - 24, len_after)
def test_rule_count_p_n1(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') l2 = Literal('Age', '<', 20) rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) p, n = ds.count_p_n_rule(rule) self.assertEqual(1, p) self.assertEqual(1, n)
def test_delete_not_covered_3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') l2 = Literal('Age', '<', 20) rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) len_before = ds.length() ds.delete_not_covered(rule) len_after = ds.length() self.assertEqual(len_before - 30, len_after)
def test_unmake_rule_1(self): df = pd.read_csv('tests/test_files/mushroom.csv', encoding='utf-8', delimiter=';') ds = BitmapDataset(0, df) rule = Rule() l = Literal('a1', 'in', 'x') l2 = Literal('a3', 'in', 'g') l3 = Literal('a6', 'in', ['a', 'f']) rule.add_literal(l) rule.add_literal(l2) rule.add_literal(l3) rule = ds.unmake_rule(rule) self.assertEqual("[[0, 0], [2, 3], [5, 0], [5, 1]]", str(rule))
def test_delete_not_covered_4(self): df = pd.read_csv('tests/test_files/testfile_4.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Male') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) len_before = ds.length() ds.delete_covered(ds.unmake_rule(rule)) self.assertEqual(36, ds.length()) ds.delete_not_covered(ds.unmake_rule(rule)) len_after = ds.length() self.assertEqual(len_before - 64, len_after)
def make_rule(self, rule): if rule is None or len(rule) == 0: return Rule() rule = sorted(rule, key=lambda x: x[0]) prev_i = -1 new_rule = Rule() for i, j in rule: if prev_i != i: if prev_i != -1: l = Literal(self.col_names[prev_i], 'in', values) new_rule.add_literal(l) values = list() values.append(self.col_unique_values[i][j]) prev_i = i l = Literal(self.col_names[i], 'in', sorted(values)) new_rule.add_literal(l) return new_rule
def grow_rule(self): rule = Rule() growset = DictDataset(self.prod, self.df) while True: p0, n0 = growset.count_p_n_rule(rule) best_foil = -math.inf best_l = None for i in range(0, len(list(growset.dict.keys()))): if self.class_name!=list(growset.dict.keys())[i]: col_name = list(growset.dict.keys())[i] foil = best_foil if col_name not in [x.var_name for x in rule.literals] or col_name in self.numeric_cols: col_values = list(set(growset.dict[col_name].values())) l, foil = growset.find_best_literal(p0, n0, col_values, col_name) if foil > best_foil: best_l = copy.deepcopy(l) best_foil = foil if best_foil > self.grow_param: rule.add_literal(best_l) growset.delete_not_covered(rule) else: break return rule