def test_rule_count_p_n3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     p, n = ds.count_p_n_rule(ds.unmake_rule(rule))
     self.assertEqual(4, p)
     self.assertEqual(4, n)
Esempio n. 2
0
 def test_prune_rule_2(self):
     df = pd.read_csv('tests/test_files/mushroom.csv',
                      encoding='utf-8',
                      delimiter=';')
     ds = DictDataset(0, df)
     l = Literal('a5', 'in', ['c', 'f', 'm', 'p', 's', 'y'])
     rule = Rule()
     rule.add_literal(l)
     rule = ds.prune_rule(rule)
     self.assertEqual("a5 in ['c', 'f', 'm', 'p', 's', 'y']",
                      rule.to_string())
 def test_rule_count_p_n5(self):
     df = pd.read_csv('tests/test_files/testfile_4.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Female')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     ds.delete_covered(ds.unmake_rule(rule))
     p, n = ds.count_p_n_rule(ds.unmake_rule(rule))
     self.assertEqual(0, p)
     self.assertEqual(0, n)
 def test_delete_not_covered_3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     len_before = ds.length()
     ds.delete_not_covered(ds.unmake_rule(rule))
     len_after = ds.length()
     self.assertEqual(len_before - 24, len_after)
 def test_delete_not_covered_4(self):
     df = pd.read_csv('tests/test_files/testfile_4.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Male')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     len_before = ds.length()
     ds.delete_covered(ds.unmake_rule(rule))
     self.assertEqual(36, ds.length())
     ds.delete_not_covered(ds.unmake_rule(rule))
     len_after = ds.length()
     self.assertEqual(len_before - 64, len_after)
 def test_unmake_rule_2(self):
     df = pd.read_csv('tests/test_files/mushroom.csv',
                      encoding='utf-8',
                      delimiter=';')
     ds = BitmapDataset(0, df)
     rule = Rule()
     rule = ds.unmake_rule(rule)
     self.assertEqual("[]", str(rule))
Esempio n. 7
0
 def test_rule_count_p_n4(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     rule = Rule()
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_rule(rule)
     self.assertEqual(16, p)
     self.assertEqual(16, n)
Esempio n. 8
0
 def test_delete_not_covered_1(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     rule = Rule()
     ds = DictDataset(1, df)
     len_before = ds.length()
     ds.delete_not_covered(rule)
     len_after = ds.length()
     self.assertEqual(0, len_after)
 def test_delete_covered_1(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     rule = Rule()
     ds = BitmapDataset(1, df)
     len_before = ds.length()
     ds.delete_covered(ds.unmake_rule(rule))
     len_after = ds.length()
     self.assertEqual(len_before, len_after)
Esempio n. 10
0
 def test_prune_rule_1(self):
     df = pd.read_csv('tests/test_files/testfile_3.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Female')
     l2 = Literal('Sex', 'in', 'Male')
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     rule = ds.prune_rule(rule)
     self.assertEqual("Sex in Female", rule.to_string())
Esempio n. 11
0
 def grow_rule(self):
     rule = Rule()
     growset = DictDataset(self.prod, self.df)
     while True:
         p0, n0 = growset.count_p_n_rule(rule)
         best_foil = -math.inf
         best_l = None
         for i in range(0, len(list(growset.dict.keys()))):
             if self.class_name!=list(growset.dict.keys())[i]:
                 col_name = list(growset.dict.keys())[i]
                 foil = best_foil
                 if col_name not in [x.var_name for x in rule.literals] or col_name in self.numeric_cols:
                     col_values = list(set(growset.dict[col_name].values()))
                     l, foil = growset.find_best_literal(p0, n0, col_values, col_name)
                 if foil > best_foil:
                     best_l = copy.deepcopy(l)
                     best_foil = foil
         if best_foil > self.grow_param:
             rule.add_literal(best_l)
             growset.delete_not_covered(rule)
         else:
             break
     return rule
Esempio n. 12
0
 def test_rule_count_p_n1(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     l2 = Literal('Age', '<', 20)
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_rule(rule)
     self.assertEqual(1, p)
     self.assertEqual(1, n)
Esempio n. 13
0
 def make_rule(self, rule):
     if rule is None or len(rule) == 0:
         return Rule()
     rule = sorted(rule, key=lambda x: x[0])
     prev_i = -1
     new_rule = Rule()
     for i, j in rule:
         if prev_i != i:
             if prev_i != -1:
                 l = Literal(self.col_names[prev_i], 'in', values)
                 new_rule.add_literal(l)
             values = list()
         values.append(self.col_unique_values[i][j])
         prev_i = i
     l = Literal(self.col_names[i], 'in', sorted(values))
     new_rule.add_literal(l)
     return new_rule
Esempio n. 14
0
 def test_delete_not_covered_3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     l2 = Literal('Age', '<', 20)
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     len_before = ds.length()
     ds.delete_not_covered(rule)
     len_after = ds.length()
     self.assertEqual(len_before - 30, len_after)
 def test_unmake_rule_1(self):
     df = pd.read_csv('tests/test_files/mushroom.csv',
                      encoding='utf-8',
                      delimiter=';')
     ds = BitmapDataset(0, df)
     rule = Rule()
     l = Literal('a1', 'in', 'x')
     l2 = Literal('a3', 'in', 'g')
     l3 = Literal('a6', 'in', ['a', 'f'])
     rule.add_literal(l)
     rule.add_literal(l2)
     rule.add_literal(l3)
     rule = ds.unmake_rule(rule)
     self.assertEqual("[[0, 0], [2, 3], [5, 0], [5, 1]]", str(rule))