Beispiel #1
0
 def test_prune_rule_1(self):
     df = pd.read_csv('tests/test_files/testfile_3.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Female')
     l2 = Literal('Sex', 'in', 'Male')
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     rule = ds.prune_rule(rule)
     self.assertEqual("Sex in Female", rule.to_string())
Beispiel #2
0
 def test_prune_rule_3(self):
     df = pd.read_csv('tests/test_files/mushroom.csv',
                      encoding='utf-8',
                      delimiter=';')
     ds = DictDataset(0, df)
     l = Literal('a5', 'in', ['c', 'f', 'm', 'p', 's', 'y'])
     l2 = Literal('a3', 'in', 'e')
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     rule = ds.prune_rule(rule)
     self.assertEqual("a5 in ['c', 'f', 'm', 'p', 's', 'y']",
                      rule.to_string())
Beispiel #3
0
 def test_rule_count_p_n1(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     l2 = Literal('Age', '<', 20)
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_rule(rule)
     self.assertEqual(1, p)
     self.assertEqual(1, n)
Beispiel #4
0
 def test_delete_not_covered_3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     l2 = Literal('Age', '<', 20)
     rule = Rule()
     rule.add_literal(l)
     rule.add_literal(l2)
     ds = DictDataset(1, df)
     len_before = ds.length()
     ds.delete_not_covered(rule)
     len_after = ds.length()
     self.assertEqual(len_before - 30, len_after)
 def test_unmake_rule_1(self):
     df = pd.read_csv('tests/test_files/mushroom.csv',
                      encoding='utf-8',
                      delimiter=';')
     ds = BitmapDataset(0, df)
     rule = Rule()
     l = Literal('a1', 'in', 'x')
     l2 = Literal('a3', 'in', 'g')
     l3 = Literal('a6', 'in', ['a', 'f'])
     rule.add_literal(l)
     rule.add_literal(l2)
     rule.add_literal(l3)
     rule = ds.unmake_rule(rule)
     self.assertEqual("[[0, 0], [2, 3], [5, 0], [5, 1]]", str(rule))
Beispiel #6
0
 def check_literal(self, atr_col_name, best_foil, best_l, i, n0, p0, unique_values, op):
     literal = Literal(atr_col_name, op, unique_values[i])
     p, n = self.count_p_n_literal(literal)
     tmp_foil = count_foil_grow(p0, n0, p, n)
     if tmp_foil > best_foil:
         best_foil = tmp_foil
         best_l = copy.deepcopy(literal)
     return best_foil, best_l
Beispiel #7
0
 def make_rule(self, rule):
     if rule is None or len(rule) == 0:
         return Rule()
     rule = sorted(rule, key=lambda x: x[0])
     prev_i = -1
     new_rule = Rule()
     for i, j in rule:
         if prev_i != i:
             if prev_i != -1:
                 l = Literal(self.col_names[prev_i], 'in', values)
                 new_rule.add_literal(l)
             values = list()
         values.append(self.col_unique_values[i][j])
         prev_i = i
     l = Literal(self.col_names[i], 'in', sorted(values))
     new_rule.add_literal(l)
     return new_rule
Beispiel #8
0
 def test_literal_count_p_n4(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Male')
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_literal(l)
     self.assertEqual(8, p)
     self.assertEqual(8, n)
Beispiel #9
0
 def test_literal_count_p_n3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Age', '>', 1000)
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_literal(l)
     self.assertEqual(0, p)
     self.assertEqual(4, n)
Beispiel #10
0
 def test_literal_count_p_n2(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', ['xab', 'asdas'])
     ds = DictDataset(1, df)
     p, n = ds.count_p_n_literal(l)
     self.assertEqual(0, p)
     self.assertEqual(0, n)
 def test_rule_count_p_n3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     p, n = ds.count_p_n_rule(ds.unmake_rule(rule))
     self.assertEqual(4, p)
     self.assertEqual(4, n)
Beispiel #12
0
 def choose_best_literal(self, atr_col_name, best_foil, best_l, df, n0, p0, unique_values, values_to_literal):
     for i in range(0, len(unique_values)):
         values_to_literal.append(df.at[i, 'value'])
         literal = Literal(atr_col_name, 'in', values_to_literal)
         p, n = self.count_p_n_literal(literal)
         foil = count_foil_grow(p0, n0, p, n)
         if foil > best_foil:
             best_foil = foil
             best_l = copy.deepcopy(literal)
         else:
             break
     return best_foil, best_l
 def test_delete_not_covered_3(self):
     df = pd.read_csv('tests/test_files/testfile_8.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('ClassOfSeat', 'in', '1st')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     len_before = ds.length()
     ds.delete_not_covered(ds.unmake_rule(rule))
     len_after = ds.length()
     self.assertEqual(len_before - 24, len_after)
 def test_rule_count_p_n5(self):
     df = pd.read_csv('tests/test_files/testfile_4.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Female')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     ds.delete_covered(ds.unmake_rule(rule))
     p, n = ds.count_p_n_rule(ds.unmake_rule(rule))
     self.assertEqual(0, p)
     self.assertEqual(0, n)
Beispiel #15
0
 def count_p_n_for_every_value_and_sort(self, atr_col_name, unique_values):
     p_to_n = list()
     for i in range(0, len(unique_values)):
         literal = Literal(atr_col_name, 'in', unique_values[i])
         p, n = self.count_p_n_literal(literal)
         if n == 0:
             p_to_n.append(math.inf)
         else:
             p_to_n.append(p / n)
     df = pd.DataFrame({'value': unique_values, 'p_to_n': p_to_n})
     df = df.sort_values(by='p_to_n', ascending=False)
     df.index = range(len(df))
     return df
 def test_delete_not_covered_4(self):
     df = pd.read_csv('tests/test_files/testfile_4.csv',
                      encoding='utf-8',
                      delimiter=';')
     l = Literal('Sex', 'in', 'Male')
     rule = Rule()
     rule.add_literal(l)
     ds = BitmapDataset(1, df)
     len_before = ds.length()
     ds.delete_covered(ds.unmake_rule(rule))
     self.assertEqual(36, ds.length())
     ds.delete_not_covered(ds.unmake_rule(rule))
     len_after = ds.length()
     self.assertEqual(len_before - 64, len_after)
Beispiel #17
0
 def test_value_covered_by_literal_less_true(self):
     l = Literal('test', '<', 15)
     self.assertEqual(True, l.value_covered_by_literal(14))
Beispiel #18
0
 def test_value_covered_by_literal_in_false(self):
     l = Literal('test', 'in', ['1st', '2nd'])
     self.assertEqual(False, l.value_covered_by_literal('3rd'))
Beispiel #19
0
 def test_value_covered_by_literal_in_true(self):
     l = Literal('test', 'in', ['1st', '2nd'])
     self.assertEqual(True, l.value_covered_by_literal('1st'))
Beispiel #20
0
 def test_value_covered_by_literal_more_false(self):
     l = Literal('test', '>', -15)
     self.assertEqual(False, l.value_covered_by_literal(-15))
Beispiel #21
0
 def test_value_covered_by_literal_more_true(self):
     l = Literal('test', '>', -15)
     self.assertEqual(True, l.value_covered_by_literal(-14))
Beispiel #22
0
 def test_value_covered_by_literal_less_false(self):
     l = Literal('test', '<', 15)
     self.assertEqual(False, l.value_covered_by_literal(15))