Beispiel #1
0
 def test_confidence(self):
     db = CsvDatabase(self.path)
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('spectacle-prescrip', 'myope')
     a3 = AttributeValue('astigmatism', 'yes')
     set1 = ItemSet.create_itemset(a1, a2)
     set2 = ItemSet.create_itemset(a3)
     self.assertEqual(db.confidence(set1, set2), 2 / 4)
Beispiel #2
0
 def test_support_count(self):
     db = CsvDatabase(self.path)
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('spectacle-prescrip', 'myope')
     set1 = ItemSet.create_itemset(a1)
     self.assertEqual(db.support_count(set1), 8)
     set2 = ItemSet.create_itemset(a1, a2)
     self.assertEqual(db.support_count(set2), 4)
     self.assertEqual(db.counter, 5)
Beispiel #3
0
 def test_join(self):
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('age', 'pre-presbyopic')
     a3 = AttributeValue('age', 'presbyopic')
     a4 = AttributeValue('contact-lenses', 'none')
     set1 = ItemSet.create_itemset(a1, a2, a3)
     set2 = ItemSet.create_itemset(a1, a4)
     set3 = set2.join(set1)
     self.assertEqual(len(set3.items), 4)
     self.assertTrue(a1 in set3.items)
     self.assertTrue(a2 in set3.items)
     self.assertTrue(a3 in set3.items)
     self.assertTrue(a4 in set3.items)
Beispiel #4
0
 def generate_confident_rules(self, global_itemset = None):
   if global_itemset is None:
     global_attr = self.database.get_distinct_attr_values()
     global_itemset = ItemSet.create_itemset(*global_attr) 
   whole_support = global_itemset.support
   candidate_itemsets = []
   for item in global_itemset.items:
     candidate_itemsets.append(global_itemset.new_remove(item)) 
   confident_rules = []
   while True:
     confident_itemsets = [
         itemset for itemset in candidate_itemsets
         if (itemset.size() > 0) and 
         (whole_support / itemset.support  >= self.min_confidence)]
     confident_rules = confident_rules + confident_itemsets
     if len(confident_itemsets) == 0:
       break
     candidate_itemsets.clear()
     for itemset in confident_itemsets:
       if itemset.size() == 1:
         continue;
       for item in itemset.items:
         new_itemset = itemset.new_remove(item)
         if new_itemset not in candidate_itemsets:
           candidate_itemsets.append(new_itemset)
   return [self._generate_rule(global_itemset, rule) for rule in confident_rules]
Beispiel #5
0
 def support_count(self, itemset: ItemSet):
     super().increase_counter()
     if itemset.isEmpty():
         return len(self.df)
     df = self.df
     for item in itemset.items:
         super().increase_counter()
         df = df[df[item.attr] == item.value]
     return len(df)
Beispiel #6
0
 def test_generate_rule_confidence(self):
     db = CsvDatabase(self.path)
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('spectacle-prescrip', 'myope')
     a3 = AttributeValue('astigmatism', 'yes')
     a4 = AttributeValue('tear-prod-rate', 'reduced')
     itemset = ItemSet.create_itemset(a1, a2, a3, a4)
     apriori = Apriori(db, 0, 0.5, 100)
     rules = apriori.generate_confident_rules(itemset)
     self.assertEqual(len(rules), 3)
Beispiel #7
0
 def test_diff(self):
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('age', 'pre-presbyopic')
     a3 = AttributeValue('age', 'presbyopic')
     a4 = AttributeValue('contact-lenses', 'none')
     set1 = ItemSet.create_itemset(a1, a2, a3)
     set2 = ItemSet.create_itemset(a2, a3, a4)
     set3 = ItemSet.create_itemset(a1, a4)
     set4 = ItemSet.create_itemset()
     self.assertEqual(
         set1.diff(set2), 2,
         '{0} and {1} does not have correct diff'.format(set1, set2))
     self.assertEqual(
         set2.diff(set3), 3,
         '{0} and {1} does not have correct diff'.format(set2, set3))
     self.assertEqual(
         set3.diff(set4), 2,
         '{0} and n{1} does not have correct diff'.format(set3, set4))
     self.assertEqual(
         set4.diff(set1), 3,
         '{0} and {1} does not have correct diff'.format(set4, set1))
Beispiel #8
0
 def generate_frequent_itemset(self, global_itemset = None):
   frequent_itemsets = {}
   if global_itemset is None:
     global_itemset = self.database.get_distinct_attr_values()
   candidate_itemset = []
   for item in global_itemset:
     candidate_itemset.append(ItemSet.create_itemset(item))
   current_length = 1
   while True:
     # pruning
     frequent_itemset = [
         itemset for itemset in candidate_itemset 
         if itemset.support >= self.min_support]
     if len(frequent_itemset) == 0:
       break
     # print('L%d: %d frequennt itemset' % (current_length, len(frequent_itemset)))
     frequent_itemsets[current_length] = frequent_itemset
     current_length += 1
     # new candidate
     # TODO: make new candidate more efficient to check/insert
     candidate_itemset.clear()
     candidate_itemset = self._extend_itemset_by_join(frequent_itemset)
   return frequent_itemsets
Beispiel #9
0
 def test_equality(self):
     a1 = AttributeValue('age', 'young')
     a2 = AttributeValue('age', 'pre-presbyopic')
     set1 = ItemSet.create_itemset(a1, a2)
     set2 = ItemSet.create_itemset(a2, a1)
     self.assertEqual(set1, set2)
Beispiel #10
0
 def _generate_rule(self, global_itemset: ItemSet, clause: ItemSet):
   result = global_itemset.new_remove(*clause.items)
   rule = Rule.create_fule(clause, result)
   return rule
Beispiel #11
0
 def create_fule(cls, clause: ItemSet, result: ItemSet):
     whole = clause.join(result)
     rule = Rule(clause, result)
     rule.confidence = whole.support / clause.support
     return rule