def test_confidence(self): db = CsvDatabase(self.path) a1 = AttributeValue('age', 'young') a2 = AttributeValue('spectacle-prescrip', 'myope') a3 = AttributeValue('astigmatism', 'yes') set1 = ItemSet.create_itemset(a1, a2) set2 = ItemSet.create_itemset(a3) self.assertEqual(db.confidence(set1, set2), 2 / 4)
def test_support_count(self): db = CsvDatabase(self.path) a1 = AttributeValue('age', 'young') a2 = AttributeValue('spectacle-prescrip', 'myope') set1 = ItemSet.create_itemset(a1) self.assertEqual(db.support_count(set1), 8) set2 = ItemSet.create_itemset(a1, a2) self.assertEqual(db.support_count(set2), 4) self.assertEqual(db.counter, 5)
def test_join(self): a1 = AttributeValue('age', 'young') a2 = AttributeValue('age', 'pre-presbyopic') a3 = AttributeValue('age', 'presbyopic') a4 = AttributeValue('contact-lenses', 'none') set1 = ItemSet.create_itemset(a1, a2, a3) set2 = ItemSet.create_itemset(a1, a4) set3 = set2.join(set1) self.assertEqual(len(set3.items), 4) self.assertTrue(a1 in set3.items) self.assertTrue(a2 in set3.items) self.assertTrue(a3 in set3.items) self.assertTrue(a4 in set3.items)
def generate_confident_rules(self, global_itemset = None): if global_itemset is None: global_attr = self.database.get_distinct_attr_values() global_itemset = ItemSet.create_itemset(*global_attr) whole_support = global_itemset.support candidate_itemsets = [] for item in global_itemset.items: candidate_itemsets.append(global_itemset.new_remove(item)) confident_rules = [] while True: confident_itemsets = [ itemset for itemset in candidate_itemsets if (itemset.size() > 0) and (whole_support / itemset.support >= self.min_confidence)] confident_rules = confident_rules + confident_itemsets if len(confident_itemsets) == 0: break candidate_itemsets.clear() for itemset in confident_itemsets: if itemset.size() == 1: continue; for item in itemset.items: new_itemset = itemset.new_remove(item) if new_itemset not in candidate_itemsets: candidate_itemsets.append(new_itemset) return [self._generate_rule(global_itemset, rule) for rule in confident_rules]
def support_count(self, itemset: ItemSet): super().increase_counter() if itemset.isEmpty(): return len(self.df) df = self.df for item in itemset.items: super().increase_counter() df = df[df[item.attr] == item.value] return len(df)
def test_generate_rule_confidence(self): db = CsvDatabase(self.path) a1 = AttributeValue('age', 'young') a2 = AttributeValue('spectacle-prescrip', 'myope') a3 = AttributeValue('astigmatism', 'yes') a4 = AttributeValue('tear-prod-rate', 'reduced') itemset = ItemSet.create_itemset(a1, a2, a3, a4) apriori = Apriori(db, 0, 0.5, 100) rules = apriori.generate_confident_rules(itemset) self.assertEqual(len(rules), 3)
def test_diff(self): a1 = AttributeValue('age', 'young') a2 = AttributeValue('age', 'pre-presbyopic') a3 = AttributeValue('age', 'presbyopic') a4 = AttributeValue('contact-lenses', 'none') set1 = ItemSet.create_itemset(a1, a2, a3) set2 = ItemSet.create_itemset(a2, a3, a4) set3 = ItemSet.create_itemset(a1, a4) set4 = ItemSet.create_itemset() self.assertEqual( set1.diff(set2), 2, '{0} and {1} does not have correct diff'.format(set1, set2)) self.assertEqual( set2.diff(set3), 3, '{0} and {1} does not have correct diff'.format(set2, set3)) self.assertEqual( set3.diff(set4), 2, '{0} and n{1} does not have correct diff'.format(set3, set4)) self.assertEqual( set4.diff(set1), 3, '{0} and {1} does not have correct diff'.format(set4, set1))
def generate_frequent_itemset(self, global_itemset = None): frequent_itemsets = {} if global_itemset is None: global_itemset = self.database.get_distinct_attr_values() candidate_itemset = [] for item in global_itemset: candidate_itemset.append(ItemSet.create_itemset(item)) current_length = 1 while True: # pruning frequent_itemset = [ itemset for itemset in candidate_itemset if itemset.support >= self.min_support] if len(frequent_itemset) == 0: break # print('L%d: %d frequennt itemset' % (current_length, len(frequent_itemset))) frequent_itemsets[current_length] = frequent_itemset current_length += 1 # new candidate # TODO: make new candidate more efficient to check/insert candidate_itemset.clear() candidate_itemset = self._extend_itemset_by_join(frequent_itemset) return frequent_itemsets
def test_equality(self): a1 = AttributeValue('age', 'young') a2 = AttributeValue('age', 'pre-presbyopic') set1 = ItemSet.create_itemset(a1, a2) set2 = ItemSet.create_itemset(a2, a1) self.assertEqual(set1, set2)
def _generate_rule(self, global_itemset: ItemSet, clause: ItemSet): result = global_itemset.new_remove(*clause.items) rule = Rule.create_fule(clause, result) return rule
def create_fule(cls, clause: ItemSet, result: ItemSet): whole = clause.join(result) rule = Rule(clause, result) rule.confidence = whole.support / clause.support return rule