def test_MAR_MaxSC_OneClass_4(self):

        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 5 / 7)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)
        gen_L_C1 = match.generators
        R1 = set(['a', 'i'])
        S_star_S1 = set(['a', 'c', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['c', 'e', 'a', 'g', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 2)
        expected_rules = []
Beispiel #2
0
    def test_mine_CAR(self):
        analyzer = GCA(self.db_RAR, 0.25)
        analyzer.clean_database()
        analyzer.mine()

        L = set([3, 5, 7])
        S = set([1, 3, 5, 7])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMMax(analyzer.lcg_into_list())
        RAR = rule_miner.mine_RAR(L_node, S_node, 0.25, 1.0, 0.0, 1.0)
        CAR2 = rule_miner.mine_CAR2(L_node, S_node, RAR, analyzer)

        self.assertTrue(len(CAR2), 13)
        rules = []
        rules.append(Rule(set([5]), set([1, 7])))
        rules.append(Rule(set([5]), set([1, 3])))
        rules.append(Rule(set([5]), set([1])))
        rules.append(Rule(set([7]), set([1, 5])))
        rules.append(Rule(set([7]), set([1, 3])))
        rules.append(Rule(set([7]), set([1])))
        rules.append(Rule(set([3, 5]), set([1, 7])))
        rules.append(Rule(set([5, 7]), set([1, 3])))
        rules.append(Rule(set([3, 5, 7]), set([1])))
        rules.append(Rule(set([5, 7]), set([1])))
        rules.append(Rule(set([3, 5]), set([1])))
        rules.append(Rule(set([3, 7]), set([1, 5])))
        rules.append(Rule(set([3, 7]), set([1])))
        for i in range(len(CAR2)):
            self.assertEqual(frozenset(CAR2[i].left), frozenset(rules[i].left))
            self.assertEqual(frozenset(CAR2[i].right),
                             frozenset(rules[i].right))
    def test_MFS_RestrictMaxSC_1(self):
        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 1)

        #Enumerate left side
        Y = set(['c', 'e', 'g'])
        X = set([])
        Z1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 6)
        expected_itemsets = []
        expected_itemsets.append(set(['e']))
        expected_itemsets.append(set(['e', 'c']))
        expected_itemsets.append(set(['e', 'g']))
        expected_itemsets.append(set(['e', 'c', 'g']))
        expected_itemsets.append(set(['g']))
        expected_itemsets.append(set(['g', 'c']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

        #Enumerate right side in accordance with left hand side 'e'
        Y = frozenset(['c', 'e', 'a', 'g', 'i']).difference(frozenset('e'))
        X = set(['e'])
        Z1 = set(['a', 'i'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 2)
        expected_itemsets = []
        expected_itemsets.append(set(['a']))
        expected_itemsets.append(set(['a', 'i']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)
Beispiel #4
0
    def test_mine_consequent_LS_2(self):
        analyzer = GCA(self.db, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set(['c', 'd'])
        S = set(['a', 'c', 'd', 't', 'w'])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMM(analyzer.lcg_into_list())
        C_LS = rule_miner.mine_cars_L_S(L_node, S_node, 0, 1, 0, 1, analyzer)
        self.assertTrue(True)
Beispiel #5
0
    def test_mine_RAR(self):
        analyzer = GCA(self.db_RAR, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set([3, 5, 7])
        S = set([1, 3, 5, 7])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMMax(analyzer.lcg_into_list())
        RAR = rule_miner.mine_RAR(L_node, S_node)
        self.assertTrue(True)
Beispiel #6
0
    def test_mine_basic_rules_LS_1(self):
        analyzer = GCA(self.db, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set(['a', 'c', 't', 'w'])
        S = set(['a', 'c', 'd', 't', 'w'])

        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMM(analyzer.lcg_into_list())
        B_LS = rule_miner.mine_LS(L_node, S_node, 0.0, 1.0, 0.0, 1.0)
        rules = []
        rules.append(Rule(set(['a', 't']), set(['d'])))
        rules.append(Rule(set(['t', 'w']), set(['d'])))
        self.assertEqual(frozenset(B_LS[0].left), frozenset(rules[0].left))
        self.assertEqual(frozenset(B_LS[0].right), frozenset(rules[0].right))
        self.assertEqual(frozenset(B_LS[1].left), frozenset(rules[1].left))
        self.assertEqual(frozenset(B_LS[1].right), frozenset(rules[1].right))
    def test_mine_db_rules(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  #percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        self.assertEqual(len(analyzer.lcg_into_list()), 10)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'e', 'g', 'i']),
                     [['a', 'e'], ['a', 'g']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['b', 'c', 'e', 'g', 'i']), [['b']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(1 / 7, set(['a', 'd', 'f', 'h', 'i']), [['d']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['c', 'e', 'g', 'i']), [['e'], ['g']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)
    def test_MFCS_FromLattice(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(
            rule_miner.lcg, set(['a', 'c', 'f', 'h', 'i']),
            rule_miner._get_support(set(['a', 'c', 'f', 'h', 'i'])), 1 / 7, 1)

        self.assertEqual(len(lcg_S), 6)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            # check closure
            match = analyzer.search_node_with_closure(expected.closure, lcg_S)
            self.assertSequenceEqual(expected.closure, match.closure)

            # check support
            self.assertEqual(expected.support, match.support)

            # check generators
            for generator in expected.generators:
                self.assertTrue(
                    is_in_generators(generator, match.generators, True))
    def test_mine(self):
        analyzer = GCA(
            self.db,
            0.16)  #percentage to get a min_supp of 1 matching the publication
        analyzer.clean_database()
        analyzer.mine()
        #closed_items = analyzer.lcg_into_list() for double hash

        db_size = len(self.db)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'd', 'f', 'h']),
                     [['d'], ['a', 'f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['a', 'h']), [['a']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['f', 'h']), [['f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['e', 'g', 'h']),
                     [['g'], ['e', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['b', 'c']), [['b']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['e']), [['e']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['h']), [['h']], None))
        expected_LGC.append(
            GCA.Node(5 / analyzer.db_length, set(['c']), [['c']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set([
                'a', 'd', 'f', 'h', 'e', 'g'
            ]), [['d', 'g'], ['d', 'e'], ['a', 'f', 'g'], ['a', 'f', 'e']],
                     None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set(['a', 'd', 'f', 'h', 'c']),
                     [['d', 'c'], ['a', 'f', 'c']], None))

        #TODO: check with publication's authors since aheg appears in two transactions in the database.
        #TODO: the example illustration shows an error with support of 1 but two transactions 1 and 3
        #expected_LGC.append(GCA.Node(1/analyzer.db_length,set(['a','h','e','g']),[['a','g'],['a','e']],None))

        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'e', 'g']),
                     [['a', 'g'], ['a', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['a', 'h', 'b', 'c', 'e', 'g']),
                     [['a', 'b'], ['a', 'g', 'c'], ['a', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'c']),
                     [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'e', 'g']),
                     [['f', 'g'], ['f', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['f', 'h', 'b', 'c', 'e', 'g']),
                     [['f', 'b'], ['f', 'g', 'c'], ['f', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'c']),
                     [['f', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['e', 'g', 'h', 'b', 'c']),
                     [['g', 'b'], ['g', 'c'], ['b', 'h'], ['c', 'e', 'h']],
                     None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['b', 'c', 'e']),
                     [['b', 'e'], ['c', 'e']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['h', 'c']), [['h', 'c']],
                     None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                #match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)

        self.assertEqual(len(expected_LGC), len(analyzer.lcg_into_list()))