def test_apriori(self): analyzer = APrioriAnalyzer(self.dataset) L, support_data = analyzer.apriori(self.dataset, minsupport=0.5) result = [item for sublist in L for item in sublist] self.assertEqual(len(result), 9) self.assertIn(frozenset({1}), result) self.assertIn(frozenset({3}), result) self.assertIn(frozenset({2}), result) self.assertIn(frozenset({5}), result) self.assertIn(frozenset({1, 3}), result) self.assertIn(frozenset({2, 3}), result) self.assertIn(frozenset({3, 5}), result) self.assertIn(frozenset({2, 5}), result) self.assertIn(frozenset({2, 3, 5}), result) self.assertEqual(support_data[frozenset({1})], 0.5) self.assertEqual(support_data[frozenset({2})], 0.75) self.assertEqual(support_data[frozenset({3})], 0.75) self.assertEqual(support_data[frozenset({4})], 0.25) self.assertEqual(support_data[frozenset({5})], 0.75) self.assertEqual(support_data[frozenset({1, 2})], 0.25) self.assertEqual(support_data[frozenset({1, 3})], 0.5) self.assertEqual(support_data[frozenset({1, 5})], 0.25) self.assertEqual(support_data[frozenset({2, 3, 5})], 0.5) self.assertEqual(support_data[frozenset({2, 3})], 0.5) self.assertEqual(support_data[frozenset({2, 5})], 0.75) self.assertEqual(support_data[frozenset({3, 5})], 0.5)
def test_aprioriGen(self): analyzer = APrioriAnalyzer(self.dataset) c1 = analyzer.createC1(analyzer.dataset) l1, support_data = analyzer.scanD(analyzer.data_into_set, c1, 0.5) gen = analyzer.aprioriGen(l1, 2) self.assertEqual(len(gen), 6) self.assertIn(frozenset({1, 3}), gen) self.assertIn(frozenset({1, 2}), gen) self.assertIn(frozenset({1, 5}), gen) self.assertIn(frozenset({2, 3}), gen) self.assertIn(frozenset({3, 5}), gen) self.assertIn(frozenset({2, 5}), gen)
def find_frequent_items_apriori(): card_loader = MagicLoader() card_loader.load('./../data/AllCards-x.json') list_files = os.listdir("./../data/decks_mtgdeck_net") for i in range(len(list_files)): # returns list list_files[i] = './../data/decks_mtgdeck_net/' + list_files[i] deck_loader = DeckManager() deck_loader.load_from_mtgdeck_csv(list_files, card_loader) print('Data loaded, creating cache') analyzer = APrioriAnalyzer(deck_loader.decks[0:10]) cache_count = APrioriAnalyzer.load_cache_count() if cache_count is None: cache_count = analyzer.create_cache_count(analyzer.dataset) print('Cache created or loaded, start Apriori') L, support_data = analyzer.apriori(analyzer.dataset[0:10], minsupport=0.5, cache_count=cache_count) print('Apriori done, rules generating') rules = analyzer.generateRules(L, support_data, min_confidence=0.9) print('Rules generation done') analyzer.export_rules('generated_rules', rules)
def test_scanD(self): analyzer = APrioriAnalyzer(self.dataset) c1 = analyzer.createC1(analyzer.dataset) l1, support_data = analyzer.scanD(analyzer.data_into_set, c1, 0.5) self.assertEqual(len(list(l1)), 4) self.assertIn(frozenset({1}), l1) self.assertIn(frozenset({2}), l1) self.assertIn(frozenset({3}), l1) self.assertIn(frozenset({5}), l1) self.assertEqual(support_data[frozenset({1})], 0.5) self.assertEqual(support_data[frozenset({2})], 0.75) self.assertEqual(support_data[frozenset({3})], 0.75) self.assertEqual(support_data[frozenset({4})], 0.25) self.assertEqual(support_data[frozenset({5})], 0.75)
def find_frequent_items_fpgrowth(): print('Load deck') card_loader = MagicLoader() card_loader.load('./../data/AllCards-x.json') print('Clean deck') list_files = os.listdir("./../data/decks_mtgdeck_net") for i in range(len(list_files)): # returns list list_files[i] = './../data/decks_mtgdeck_net/' + list_files[i] deck_loader = DeckManager() deck_loader.load_from_mtgdeck_csv(list_files, card_loader) cards_usage = APrioriAnalyzer.load_cache_count() deck_loader.extract_high_used_cards(cards_usage, len(deck_loader.decks) * 0.01) print('Prepare deck for FPGrowth') minSupport = len(deck_loader.decks) * 0.001 initSet = FPGrowthAnalyzer.createInitSet(deck_loader.decks) print('Create FPGrowth tree') myFPtree, myHeaderTab = FPGrowthAnalyzer.createTree(initSet, minSupport) freqItems = [] print('Mine FPGrowth tree') FPGrowthAnalyzer.mineTree(myFPtree, myHeaderTab, minSupport, set([]), freqItems) FPGrowthAnalyzer.export_frequent_items('generated_freq_items', freqItems) print('Convert frequent sets for generating rules') L_hash = {} for item in freqItems: if len(item) in L_hash: L_hash[len(item)].append(item) else: L_hash[len(item)] = [] L = [] for i in sorted(L_hash.keys()): L.append(map(frozenset, L_hash[i])) analyzer = APrioriAnalyzer(deck_loader.decks) _, support_data = analyzer.scanD(analyzer.data_into_set, map(frozenset, freqItems), minSupport) print('Generating rules') rules = analyzer.generateRules(L, support_data, min_confidence=0.7) print('Writing rules') analyzer.export_rules('generated_rules', rules)
def test_generate_rules(self): analyzer = APrioriAnalyzer(self.dataset) L, support_data = analyzer.apriori(self.dataset, minsupport=0.5) rules = analyzer.generateRules(L, support_data, min_confidence=0.5) self.assertEqual(len(rules), 11)
def test_c1(self): analyzer = APrioriAnalyzer(self.dataset) c1 = analyzer.createC1(analyzer.dataset) self.assertTrue(len(list(c1)), 5)