def test_prune_rule_1(self): df = pd.read_csv('tests/test_files/testfile_3.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Female') l2 = Literal('Sex', 'in', 'Male') rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) rule = ds.prune_rule(rule) self.assertEqual("Sex in Female", rule.to_string())
def test_prune_rule_3(self): df = pd.read_csv('tests/test_files/mushroom.csv', encoding='utf-8', delimiter=';') ds = DictDataset(0, df) l = Literal('a5', 'in', ['c', 'f', 'm', 'p', 's', 'y']) l2 = Literal('a3', 'in', 'e') rule = Rule() rule.add_literal(l) rule.add_literal(l2) rule = ds.prune_rule(rule) self.assertEqual("a5 in ['c', 'f', 'm', 'p', 's', 'y']", rule.to_string())
def test_rule_count_p_n1(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') l2 = Literal('Age', '<', 20) rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) p, n = ds.count_p_n_rule(rule) self.assertEqual(1, p) self.assertEqual(1, n)
def test_delete_not_covered_3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') l2 = Literal('Age', '<', 20) rule = Rule() rule.add_literal(l) rule.add_literal(l2) ds = DictDataset(1, df) len_before = ds.length() ds.delete_not_covered(rule) len_after = ds.length() self.assertEqual(len_before - 30, len_after)
def test_unmake_rule_1(self): df = pd.read_csv('tests/test_files/mushroom.csv', encoding='utf-8', delimiter=';') ds = BitmapDataset(0, df) rule = Rule() l = Literal('a1', 'in', 'x') l2 = Literal('a3', 'in', 'g') l3 = Literal('a6', 'in', ['a', 'f']) rule.add_literal(l) rule.add_literal(l2) rule.add_literal(l3) rule = ds.unmake_rule(rule) self.assertEqual("[[0, 0], [2, 3], [5, 0], [5, 1]]", str(rule))
def check_literal(self, atr_col_name, best_foil, best_l, i, n0, p0, unique_values, op): literal = Literal(atr_col_name, op, unique_values[i]) p, n = self.count_p_n_literal(literal) tmp_foil = count_foil_grow(p0, n0, p, n) if tmp_foil > best_foil: best_foil = tmp_foil best_l = copy.deepcopy(literal) return best_foil, best_l
def make_rule(self, rule): if rule is None or len(rule) == 0: return Rule() rule = sorted(rule, key=lambda x: x[0]) prev_i = -1 new_rule = Rule() for i, j in rule: if prev_i != i: if prev_i != -1: l = Literal(self.col_names[prev_i], 'in', values) new_rule.add_literal(l) values = list() values.append(self.col_unique_values[i][j]) prev_i = i l = Literal(self.col_names[i], 'in', sorted(values)) new_rule.add_literal(l) return new_rule
def test_literal_count_p_n4(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Male') ds = DictDataset(1, df) p, n = ds.count_p_n_literal(l) self.assertEqual(8, p) self.assertEqual(8, n)
def test_literal_count_p_n3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('Age', '>', 1000) ds = DictDataset(1, df) p, n = ds.count_p_n_literal(l) self.assertEqual(0, p) self.assertEqual(4, n)
def test_literal_count_p_n2(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', ['xab', 'asdas']) ds = DictDataset(1, df) p, n = ds.count_p_n_literal(l) self.assertEqual(0, p) self.assertEqual(0, n)
def test_rule_count_p_n3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) p, n = ds.count_p_n_rule(ds.unmake_rule(rule)) self.assertEqual(4, p) self.assertEqual(4, n)
def choose_best_literal(self, atr_col_name, best_foil, best_l, df, n0, p0, unique_values, values_to_literal): for i in range(0, len(unique_values)): values_to_literal.append(df.at[i, 'value']) literal = Literal(atr_col_name, 'in', values_to_literal) p, n = self.count_p_n_literal(literal) foil = count_foil_grow(p0, n0, p, n) if foil > best_foil: best_foil = foil best_l = copy.deepcopy(literal) else: break return best_foil, best_l
def test_delete_not_covered_3(self): df = pd.read_csv('tests/test_files/testfile_8.csv', encoding='utf-8', delimiter=';') l = Literal('ClassOfSeat', 'in', '1st') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) len_before = ds.length() ds.delete_not_covered(ds.unmake_rule(rule)) len_after = ds.length() self.assertEqual(len_before - 24, len_after)
def test_rule_count_p_n5(self): df = pd.read_csv('tests/test_files/testfile_4.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Female') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) ds.delete_covered(ds.unmake_rule(rule)) p, n = ds.count_p_n_rule(ds.unmake_rule(rule)) self.assertEqual(0, p) self.assertEqual(0, n)
def count_p_n_for_every_value_and_sort(self, atr_col_name, unique_values): p_to_n = list() for i in range(0, len(unique_values)): literal = Literal(atr_col_name, 'in', unique_values[i]) p, n = self.count_p_n_literal(literal) if n == 0: p_to_n.append(math.inf) else: p_to_n.append(p / n) df = pd.DataFrame({'value': unique_values, 'p_to_n': p_to_n}) df = df.sort_values(by='p_to_n', ascending=False) df.index = range(len(df)) return df
def test_delete_not_covered_4(self): df = pd.read_csv('tests/test_files/testfile_4.csv', encoding='utf-8', delimiter=';') l = Literal('Sex', 'in', 'Male') rule = Rule() rule.add_literal(l) ds = BitmapDataset(1, df) len_before = ds.length() ds.delete_covered(ds.unmake_rule(rule)) self.assertEqual(36, ds.length()) ds.delete_not_covered(ds.unmake_rule(rule)) len_after = ds.length() self.assertEqual(len_before - 64, len_after)
def test_value_covered_by_literal_less_true(self): l = Literal('test', '<', 15) self.assertEqual(True, l.value_covered_by_literal(14))
def test_value_covered_by_literal_in_false(self): l = Literal('test', 'in', ['1st', '2nd']) self.assertEqual(False, l.value_covered_by_literal('3rd'))
def test_value_covered_by_literal_in_true(self): l = Literal('test', 'in', ['1st', '2nd']) self.assertEqual(True, l.value_covered_by_literal('1st'))
def test_value_covered_by_literal_more_false(self): l = Literal('test', '>', -15) self.assertEqual(False, l.value_covered_by_literal(-15))
def test_value_covered_by_literal_more_true(self): l = Literal('test', '>', -15) self.assertEqual(True, l.value_covered_by_literal(-14))
def test_value_covered_by_literal_less_false(self): l = Literal('test', '<', 15) self.assertEqual(False, l.value_covered_by_literal(15))