def test_MAR_MaxSC_OneClass_4(self):

        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 5 / 7)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)
        gen_L_C1 = match.generators
        R1 = set(['a', 'i'])
        S_star_S1 = set(['a', 'c', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['c', 'e', 'a', 'g', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 2)
        expected_rules = []
Example #2
0
    def test_mine_CAR(self):
        analyzer = GCA(self.db_RAR, 0.25)
        analyzer.clean_database()
        analyzer.mine()

        L = set([3, 5, 7])
        S = set([1, 3, 5, 7])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMMax(analyzer.lcg_into_list())
        RAR = rule_miner.mine_RAR(L_node, S_node, 0.25, 1.0, 0.0, 1.0)
        CAR2 = rule_miner.mine_CAR2(L_node, S_node, RAR, analyzer)

        self.assertTrue(len(CAR2), 13)
        rules = []
        rules.append(Rule(set([5]), set([1, 7])))
        rules.append(Rule(set([5]), set([1, 3])))
        rules.append(Rule(set([5]), set([1])))
        rules.append(Rule(set([7]), set([1, 5])))
        rules.append(Rule(set([7]), set([1, 3])))
        rules.append(Rule(set([7]), set([1])))
        rules.append(Rule(set([3, 5]), set([1, 7])))
        rules.append(Rule(set([5, 7]), set([1, 3])))
        rules.append(Rule(set([3, 5, 7]), set([1])))
        rules.append(Rule(set([5, 7]), set([1])))
        rules.append(Rule(set([3, 5]), set([1])))
        rules.append(Rule(set([3, 7]), set([1, 5])))
        rules.append(Rule(set([3, 7]), set([1])))
        for i in range(len(CAR2)):
            self.assertEqual(frozenset(CAR2[i].left), frozenset(rules[i].left))
            self.assertEqual(frozenset(CAR2[i].right),
                             frozenset(rules[i].right))
 def test_attribute_folders_L2(self):
     self.analyzer.attribute_folders(self.L2, 2)
     folders = self.analyzer.L_folders
     self.assertEqual(len(folders[GCA.key_folder(['a'])]), 5)
     self.assertEqual(len(folders[GCA.key_folder(['b'])]), 2)
     self.assertEqual(len(folders[GCA.key_folder(['d'])]), 3)
     self.assertEqual(len(folders[GCA.key_folder(['e'])]), 2)
     self.assertEqual(len(folders[GCA.key_folder(['f'])]), 4)
     self.assertEqual(len(folders[GCA.key_folder(['g'])]), 2)
     self.assertEqual(len(folders[GCA.key_folder(['h'])]), 1)
     self.assertNotIn(GCA.key_folder(['c']), folders)
Example #4
0
    def test_mine_consequent_LS_2(self):
        analyzer = GCA(self.db, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set(['c', 'd'])
        S = set(['a', 'c', 'd', 't', 'w'])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMM(analyzer.lcg_into_list())
        C_LS = rule_miner.mine_cars_L_S(L_node, S_node, 0, 1, 0, 1, analyzer)
        self.assertTrue(True)
Example #5
0
    def test_mine_RAR(self):
        analyzer = GCA(self.db_RAR, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set([3, 5, 7])
        S = set([1, 3, 5, 7])
        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMMax(analyzer.lcg_into_list())
        RAR = rule_miner.mine_RAR(L_node, S_node)
        self.assertTrue(True)
    def test_MFS_RestrictMaxSC_1(self):
        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 1)

        #Enumerate left side
        Y = set(['c', 'e', 'g'])
        X = set([])
        Z1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 6)
        expected_itemsets = []
        expected_itemsets.append(set(['e']))
        expected_itemsets.append(set(['e', 'c']))
        expected_itemsets.append(set(['e', 'g']))
        expected_itemsets.append(set(['e', 'c', 'g']))
        expected_itemsets.append(set(['g']))
        expected_itemsets.append(set(['g', 'c']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

        #Enumerate right side in accordance with left hand side 'e'
        Y = frozenset(['c', 'e', 'a', 'g', 'i']).difference(frozenset('e'))
        X = set(['e'])
        Z1 = set(['a', 'i'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 2)
        expected_itemsets = []
        expected_itemsets.append(set(['a']))
        expected_itemsets.append(set(['a', 'i']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)
    def test_mine_rules_1_integer(self):
        analyzer = GCA(self.db_rules_integer,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({3, 5, 7})
        R1 = set([1, 9])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 14)
    def test_mine_rules_2_integer(self):
        # From publication example 3.b
        analyzer = GCA(self.db_rules_integer,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({1})
        R1 = set([3, 6, 8, 9])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 12)
    def test_mine_rules_1(self):
        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({'c', 'e', 'g'})
        R1 = set(['a', 'i'])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 14)
Example #10
0
    def test_all_rules(self):
        analyzer = GCA(self.db_RAR, 0.25)
        analyzer.clean_database()
        analyzer.mine()

        lattice = analyzer.lcg_into_lattice()
        rule_miner = RAMMax(analyzer.lcg_into_list())

        nb_rules = 0
        nb_basic_rules = 0
        for node in lattice.values():
            S = node.fci
            print('S: ' + str(S.closure))

            to_extract = deque()
            to_extract.append(node)
            to_extract.extend(node.children)
            visited = deque()
            while len(to_extract) > 0:
                current = to_extract.popleft()
                visited.append(current)
                L = current.fci

                RAR = rule_miner.mine_RAR(L, S, 0.95, 1.0, 0.95, 1.0)
                nb_consequent = len(rule_miner.mine_CAR2(L, S, RAR, analyzer))
                nb_basic_rules += len(RAR)
                nb_rules += nb_consequent

                print('  - L:' + str(L.closure) + ',gen: ' +
                      str(L.generators) + ', nb BR min/max: ' + str(len(RAR)) +
                      ', nb CR: ' + str(nb_consequent) + ', TBR: ' +
                      str(nb_basic_rules) + ', TBC: ' + str(nb_rules))

                for child in current.children:
                    for grandchild in child.children:
                        if grandchild not in to_extract and grandchild not in visited:
                            to_extract.append(grandchild)
                        else:
                            print('Child: ' + str(grandchild.fci.closure) +
                                  ', gen: ' + str(grandchild.fci.generators) +
                                  ' already waiting for extraction or visited')

        print('nb rules: ' + str(nb_rules))
        self.assertTrue(False)
Example #11
0
    def test_mine_basic_rules_LS_1(self):
        analyzer = GCA(self.db, 0.0)
        analyzer.clean_database()
        analyzer.mine()

        L = set(['a', 'c', 't', 'w'])
        S = set(['a', 'c', 'd', 't', 'w'])

        L_node = analyzer.search_node_with_closure(L)
        S_node = analyzer.search_node_with_closure(S)

        rule_miner = RAMM(analyzer.lcg_into_list())
        B_LS = rule_miner.mine_LS(L_node, S_node, 0.0, 1.0, 0.0, 1.0)
        rules = []
        rules.append(Rule(set(['a', 't']), set(['d'])))
        rules.append(Rule(set(['t', 'w']), set(['d'])))
        self.assertEqual(frozenset(B_LS[0].left), frozenset(rules[0].left))
        self.assertEqual(frozenset(B_LS[0].right), frozenset(rules[0].right))
        self.assertEqual(frozenset(B_LS[1].left), frozenset(rules[1].left))
        self.assertEqual(frozenset(B_LS[1].right), frozenset(rules[1].right))
    def test_mine_db_rules(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  #percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        self.assertEqual(len(analyzer.lcg_into_list()), 10)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'e', 'g', 'i']),
                     [['a', 'e'], ['a', 'g']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['b', 'c', 'e', 'g', 'i']), [['b']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(1 / 7, set(['a', 'd', 'f', 'h', 'i']), [['d']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['c', 'e', 'g', 'i']), [['e'], ['g']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)
    def setUp(self):
        """
        Validate the developments with the indications published here:
        https://pdfs.semanticscholar.org/56a4/ec156b26225b5922182bacc4c5b26fd5a555.pdf
        """

        self.db = []
        self.db.append(['a', 'b', 'c', 'e', 'g', 'h'])
        self.db.append(['a', 'c', 'd', 'f', 'h'])
        self.db.append(['a', 'd', 'e', 'f', 'g', 'h'])
        self.db.append(['b', 'c', 'e', 'f', 'g', 'h'])
        self.db.append(['b', 'c', 'e'])
        self.db.append(['b', 'c'])

        self.db_rules = []  #database used for rules association mining
        self.db_rules.append(['a', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'f', 'h', 'i'])
        self.db_rules.append(['a', 'd', 'f', 'h', 'i'])
        self.db_rules.append(['b', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['b', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'f', 'h', 'i'])

        self.db_rules_integer = [
        ]  # database used for rules association mining with integer
        self.db_rules_integer.append([1, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 6, 8, 9])
        self.db_rules_integer.append([1, 4, 6, 8, 9])
        self.db_rules_integer.append([2, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 5, 7, 9])
        self.db_rules_integer.append([2, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 6, 8, 9])

        self.analyzer = GCA([], 1)
        root = None

        a = GCA.Node(3, ('a'), ['a'], (1, 2, 3), root)
        b = GCA.Node(4, ('b'), ['b'], (1, 4, 5, 6), root)
        c = GCA.Node(5, ('c'), ['c'], (1, 2, 4, 5, 6), root)
        d = GCA.Node(2, ('d'), ['d'], (2, 3), root)
        e = GCA.Node(4, ('e'), ['e'], (1, 3, 4, 5), root)
        f = GCA.Node(3, ('f'), ['f'], (2, 3, 4), root)
        g = GCA.Node(3, ('g'), ['g'], (1, 3, 4), root)
        h = GCA.Node(4, ('h'), ['h'], (1, 2, 3, 4), root)
        self.L1 = [d, a, f, g, b, e, h, c]

        dc = GCA.Node(1, ('a', 'd', 'f', 'h', 'c'), ['d', 'c'], set([2]), d)
        de = GCA.Node(1, ('a', 'd', 'f', 'h', 'e'), ['d', 'e'], set([3]), d)
        dg = GCA.Node(1, ('a', 'd', 'f', 'h', 'e', 'g'), ['d', 'g'], set([3]),
                      d)
        af = GCA.Node(2, ('a', 'f', 'h'), ['a', 'f'], (2, 3), a)
        ag = GCA.Node(2, ('a', 'h', 'e', 'g'), ['a', 'g'], (1, 3), a)
        ab = GCA.Node(1, ('a', 'h', 'b', 'c'), ['a', 'b'], set([1]), a)
        ac = GCA.Node(2, ('a', 'h', 'c'), ['a', 'c'], (1, 2), a)
        ae = GCA.Node(2, ('a', 'e'), ['a', 'e'], (1, 3), a)
        gb = GCA.Node(2, ('e', 'g', 'h', 'b', 'c'), ['g', 'b'], (1, 4), g)
        gc = GCA.Node(2, ('g', 'c'), ['g', 'c'], (1, 4), g)
        be = GCA.Node(3, ('b', 'c', 'e'), ['b', 'e'], (1, 4, 5), b)
        bh = GCA.Node(2, ('b', 'c', 'h'), ['b', 'h'], (1, 4), b)
        eh = GCA.Node(3, ('e', 'h'), ['e', 'h'], (1, 3, 4), e)
        ec = GCA.Node(3, ('e', 'c'), ['e', 'c'], (1, 4, 5), e)
        fg = GCA.Node(2, ('f', 'h', 'e', 'g'), ['f', 'g'], (3, 4), f)
        fb = GCA.Node(1, ('f', 'h', 'b', 'c'), ['f', 'b'], set([4]), f)
        fc = GCA.Node(2, ('f', 'h', 'c'), ['f', 'c'], (2, 4), f)
        fe = GCA.Node(2, ('f', 'h', 'e'), ['f', 'e'], (3, 4), f)
        hc = GCA.Node(3, ('h', 'c'), ['h', 'c'], (1, 2, 4), h)

        #Order here is important, it depends of the order at L1
        self.L2 = [
            dg, de, dc, af, ag, ab, ae, ac, fg, fb, fe, fc, gb, gc, be, bh, eh,
            ec, hc
        ]
class TestGenCloseAnalyzer(unittest.TestCase):
    def setUp(self):
        """
        Validate the developments with the indications published here:
        https://pdfs.semanticscholar.org/56a4/ec156b26225b5922182bacc4c5b26fd5a555.pdf
        """

        self.db = []
        self.db.append(['a', 'b', 'c', 'e', 'g', 'h'])
        self.db.append(['a', 'c', 'd', 'f', 'h'])
        self.db.append(['a', 'd', 'e', 'f', 'g', 'h'])
        self.db.append(['b', 'c', 'e', 'f', 'g', 'h'])
        self.db.append(['b', 'c', 'e'])
        self.db.append(['b', 'c'])

        self.db_rules = []  #database used for rules association mining
        self.db_rules.append(['a', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'f', 'h', 'i'])
        self.db_rules.append(['a', 'd', 'f', 'h', 'i'])
        self.db_rules.append(['b', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['b', 'c', 'e', 'g', 'i'])
        self.db_rules.append(['a', 'c', 'f', 'h', 'i'])

        self.db_rules_integer = [
        ]  # database used for rules association mining with integer
        self.db_rules_integer.append([1, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 6, 8, 9])
        self.db_rules_integer.append([1, 4, 6, 8, 9])
        self.db_rules_integer.append([2, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 5, 7, 9])
        self.db_rules_integer.append([2, 3, 5, 7, 9])
        self.db_rules_integer.append([1, 3, 6, 8, 9])

        self.analyzer = GCA([], 1)
        root = None

        a = GCA.Node(3, ('a'), ['a'], (1, 2, 3), root)
        b = GCA.Node(4, ('b'), ['b'], (1, 4, 5, 6), root)
        c = GCA.Node(5, ('c'), ['c'], (1, 2, 4, 5, 6), root)
        d = GCA.Node(2, ('d'), ['d'], (2, 3), root)
        e = GCA.Node(4, ('e'), ['e'], (1, 3, 4, 5), root)
        f = GCA.Node(3, ('f'), ['f'], (2, 3, 4), root)
        g = GCA.Node(3, ('g'), ['g'], (1, 3, 4), root)
        h = GCA.Node(4, ('h'), ['h'], (1, 2, 3, 4), root)
        self.L1 = [d, a, f, g, b, e, h, c]

        dc = GCA.Node(1, ('a', 'd', 'f', 'h', 'c'), ['d', 'c'], set([2]), d)
        de = GCA.Node(1, ('a', 'd', 'f', 'h', 'e'), ['d', 'e'], set([3]), d)
        dg = GCA.Node(1, ('a', 'd', 'f', 'h', 'e', 'g'), ['d', 'g'], set([3]),
                      d)
        af = GCA.Node(2, ('a', 'f', 'h'), ['a', 'f'], (2, 3), a)
        ag = GCA.Node(2, ('a', 'h', 'e', 'g'), ['a', 'g'], (1, 3), a)
        ab = GCA.Node(1, ('a', 'h', 'b', 'c'), ['a', 'b'], set([1]), a)
        ac = GCA.Node(2, ('a', 'h', 'c'), ['a', 'c'], (1, 2), a)
        ae = GCA.Node(2, ('a', 'e'), ['a', 'e'], (1, 3), a)
        gb = GCA.Node(2, ('e', 'g', 'h', 'b', 'c'), ['g', 'b'], (1, 4), g)
        gc = GCA.Node(2, ('g', 'c'), ['g', 'c'], (1, 4), g)
        be = GCA.Node(3, ('b', 'c', 'e'), ['b', 'e'], (1, 4, 5), b)
        bh = GCA.Node(2, ('b', 'c', 'h'), ['b', 'h'], (1, 4), b)
        eh = GCA.Node(3, ('e', 'h'), ['e', 'h'], (1, 3, 4), e)
        ec = GCA.Node(3, ('e', 'c'), ['e', 'c'], (1, 4, 5), e)
        fg = GCA.Node(2, ('f', 'h', 'e', 'g'), ['f', 'g'], (3, 4), f)
        fb = GCA.Node(1, ('f', 'h', 'b', 'c'), ['f', 'b'], set([4]), f)
        fc = GCA.Node(2, ('f', 'h', 'c'), ['f', 'c'], (2, 4), f)
        fe = GCA.Node(2, ('f', 'h', 'e'), ['f', 'e'], (3, 4), f)
        hc = GCA.Node(3, ('h', 'c'), ['h', 'c'], (1, 2, 4), h)

        #Order here is important, it depends of the order at L1
        self.L2 = [
            dg, de, dc, af, ag, ab, ae, ac, fg, fb, fe, fc, gb, gc, be, bh, eh,
            ec, hc
        ]

    def test_attribute_folders_L1(self):
        self.analyzer.attribute_folders(self.L1, 1)
        self.assertEqual(len(self.analyzer.L_folders), len(self.L1))

    def test_attribute_folders_L2(self):
        self.analyzer.attribute_folders(self.L2, 2)
        folders = self.analyzer.L_folders
        self.assertEqual(len(folders[GCA.key_folder(['a'])]), 5)
        self.assertEqual(len(folders[GCA.key_folder(['b'])]), 2)
        self.assertEqual(len(folders[GCA.key_folder(['d'])]), 3)
        self.assertEqual(len(folders[GCA.key_folder(['e'])]), 2)
        self.assertEqual(len(folders[GCA.key_folder(['f'])]), 4)
        self.assertEqual(len(folders[GCA.key_folder(['g'])]), 2)
        self.assertEqual(len(folders[GCA.key_folder(['h'])]), 1)
        self.assertNotIn(GCA.key_folder(['c']), folders)

    def test_EOB_L1(self):
        self.analyzer.attribute_folders(self.L1, 1)
        self.analyzer.extend_merge(self.L1, 1)
        self.assertEqual(len(self.analyzer.L_folders), len(self.L1))

    def test_EOB_L2(self):
        self.analyzer.attribute_folders(self.L2, 2)
        self.analyzer.extend_merge(self.L2, 2)
        folders = self.analyzer.L_folders

        self.assertEqual(len(self.analyzer.L_folders), 5)

        self.assertEqual(len(folders[GCA.key_folder(['d'])]), 2)
        self.assertEqual(len(folders[GCA.key_folder(['a'])]), 4)
        self.assertEqual(len(folders[GCA.key_folder(['g'])]), 3)
        self.assertEqual(len(folders[GCA.key_folder(['f'])]), 3)
        self.assertEqual(len(folders[GCA.key_folder(['h'])]), 1)

        folder = folders[GCA.key_folder(['d'])]
        self.assertEqual(folder[0].closure,
                         frozenset(('a', 'd', 'f', 'g', 'h', 'e', 'g')))
        self.assertEqual(folder[0].generators, [['d', 'g'], ['d', 'e']])
        self.assertEqual(folder[0].transactions, set([3]))
        self.assertEqual(folder[0].support, 1)
        self.assertEqual(folder[1].closure, frozenset(
            ('a', 'd', 'f', 'h', 'c')))
        self.assertEqual(folder[1].generators, [['d', 'c']])
        self.assertEqual(folder[1].transactions, set([2]))
        self.assertEqual(folder[1].support, 1)

        folder = folders[GCA.key_folder(['a'])]
        self.assertEqual(folder[0].closure, frozenset(('a', 'h', 'f')))
        self.assertEqual(folder[0].generators, [['a', 'f']])
        self.assertEqual(folder[0].transactions, set([2, 3]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure, frozenset(('a', 'h', 'e', 'g')))
        self.assertEqual(folder[1].generators, [['a', 'g'], ['a', 'e']])
        self.assertEqual(folder[1].transactions, set([1, 3]))
        self.assertEqual(folder[1].support, 2)
        self.assertEqual(folder[2].closure,
                         frozenset(('a', 'h', 'b', 'c', 'e', 'g')))
        self.assertEqual(folder[2].generators, [['a', 'b']])
        self.assertEqual(folder[2].transactions, set([1]))
        self.assertEqual(folder[2].support, 1)
        self.assertEqual(folder[3].closure, frozenset(('a', 'h', 'c')))
        self.assertEqual(folder[3].generators, [['a', 'c']])
        self.assertEqual(folder[3].transactions, set([1, 2]))
        self.assertEqual(folder[3].support, 2)

        folder = folders[GCA.key_folder(['g'])]
        self.assertEqual(folder[0].closure, frozenset(
            ('e', 'g', 'h', 'b', 'c')))
        self.assertEqual(folder[0].generators,
                         [['g', 'b'], ['g', 'c'], ['b', 'h']])
        self.assertEqual(folder[0].transactions, set([1, 4]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure, frozenset(('b', 'c', 'e')))
        self.assertEqual(folder[1].generators, [['b', 'e'], ['e', 'c']])
        self.assertEqual(folder[1].transactions, set([1, 4, 5]))
        self.assertEqual(folder[1].support, 3)
        self.assertEqual(folder[2].closure, frozenset(('e', 'h')))
        self.assertEqual(folder[2].generators, [['e', 'h']])
        self.assertEqual(folder[2].transactions, set([1, 3, 4]))
        self.assertEqual(folder[2].support, 3)

        folder = folders[GCA.key_folder(['f'])]
        self.assertEqual(folder[0].closure, frozenset(('f', 'h', 'e', 'g')))
        self.assertEqual(folder[0].generators, [['f', 'g'], ['f', 'e']])
        self.assertEqual(folder[0].transactions, set([3, 4]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure,
                         frozenset(('f', 'h', 'b', 'c', 'e', 'g')))
        self.assertEqual(folder[1].generators, [['f', 'b']])
        self.assertEqual(folder[1].transactions, set([4]))
        self.assertEqual(folder[1].support, 1)
        self.assertEqual(folder[2].closure, frozenset(('f', 'h', 'c')))
        self.assertEqual(folder[2].generators, [['f', 'c']])
        self.assertEqual(folder[2].transactions, set([2, 4]))
        self.assertEqual(folder[2].support, 2)

    def test_mine(self):
        analyzer = GCA(
            self.db,
            0.16)  #percentage to get a min_supp of 1 matching the publication
        analyzer.clean_database()
        analyzer.mine()
        #closed_items = analyzer.lcg_into_list() for double hash

        db_size = len(self.db)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'd', 'f', 'h']),
                     [['d'], ['a', 'f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['a', 'h']), [['a']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['f', 'h']), [['f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['e', 'g', 'h']),
                     [['g'], ['e', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['b', 'c']), [['b']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['e']), [['e']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['h']), [['h']], None))
        expected_LGC.append(
            GCA.Node(5 / analyzer.db_length, set(['c']), [['c']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set([
                'a', 'd', 'f', 'h', 'e', 'g'
            ]), [['d', 'g'], ['d', 'e'], ['a', 'f', 'g'], ['a', 'f', 'e']],
                     None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set(['a', 'd', 'f', 'h', 'c']),
                     [['d', 'c'], ['a', 'f', 'c']], None))

        #TODO: check with publication's authors since aheg appears in two transactions in the database.
        #TODO: the example illustration shows an error with support of 1 but two transactions 1 and 3
        #expected_LGC.append(GCA.Node(1/analyzer.db_length,set(['a','h','e','g']),[['a','g'],['a','e']],None))

        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'e', 'g']),
                     [['a', 'g'], ['a', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['a', 'h', 'b', 'c', 'e', 'g']),
                     [['a', 'b'], ['a', 'g', 'c'], ['a', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'c']),
                     [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'e', 'g']),
                     [['f', 'g'], ['f', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['f', 'h', 'b', 'c', 'e', 'g']),
                     [['f', 'b'], ['f', 'g', 'c'], ['f', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'c']),
                     [['f', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['e', 'g', 'h', 'b', 'c']),
                     [['g', 'b'], ['g', 'c'], ['b', 'h'], ['c', 'e', 'h']],
                     None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['b', 'c', 'e']),
                     [['b', 'e'], ['c', 'e']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['h', 'c']), [['h', 'c']],
                     None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                #match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)

        self.assertEqual(len(expected_LGC), len(analyzer.lcg_into_list()))

    def test_mine_db_rules(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  #percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        self.assertEqual(len(analyzer.lcg_into_list()), 10)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'e', 'g', 'i']),
                     [['a', 'e'], ['a', 'g']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['b', 'c', 'e', 'g', 'i']), [['b']], None))
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(1 / 7, set(['a', 'd', 'f', 'h', 'i']), [['d']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['c', 'e', 'g', 'i']), [['e'], ['g']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)

    def test_MFCS_FromLattice(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(
            rule_miner.lcg, set(['a', 'c', 'f', 'h', 'i']),
            rule_miner._get_support(set(['a', 'c', 'f', 'h', 'i'])), 1 / 7, 1)

        self.assertEqual(len(lcg_S), 6)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            # check closure
            match = analyzer.search_node_with_closure(expected.closure, lcg_S)
            self.assertSequenceEqual(expected.closure, match.closure)

            # check support
            self.assertEqual(expected.support, match.support)

            # check generators
            for generator in expected.generators:
                self.assertTrue(
                    is_in_generators(generator, match.generators, True))

    def test_MFS_RestrictMaxSC_1(self):
        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 1)

        #Enumerate left side
        Y = set(['c', 'e', 'g'])
        X = set([])
        Z1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 6)
        expected_itemsets = []
        expected_itemsets.append(set(['e']))
        expected_itemsets.append(set(['e', 'c']))
        expected_itemsets.append(set(['e', 'g']))
        expected_itemsets.append(set(['e', 'c', 'g']))
        expected_itemsets.append(set(['g']))
        expected_itemsets.append(set(['g', 'c']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

        #Enumerate right side in accordance with left hand side 'e'
        Y = frozenset(['c', 'e', 'a', 'g', 'i']).difference(frozenset('e'))
        X = set(['e'])
        Z1 = set(['a', 'i'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 2)
        expected_itemsets = []
        expected_itemsets.append(set(['a']))
        expected_itemsets.append(set(['a', 'i']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

    def test_MFS_RestrictMaxSC_2(self):
        # From publication example 3.b
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['a', 'c', 'f', 'h', 'i']),
                                            2 / 7, 1 / 7, 1)

        # Enumerate left side
        Y = set(['a'])
        X = set([])
        Z1 = set(['a'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 1)
        expected_itemsets = []
        expected_itemsets.append(set(['a']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

        # Enumerate right side in accordance with left hand side 'a'
        Y = frozenset(['a', 'c', 'f', 'h', 'i']).difference(frozenset('a'))
        X = set(['a'])
        Z1 = set(['c', 'f', 'h', 'i'])
        match = analyzer.search_node_with_closure(Y, lcg_S)
        gen_X_Y = match.generators
        fs_star_Y = rule_miner.MFS_RestrictMaxSC(Y, X, Z1, gen_X_Y)

        self.assertEqual(len(fs_star_Y), 6)
        expected_itemsets = []
        expected_itemsets.append(set(['c', 'f']))
        expected_itemsets.append(set(['c', 'f', 'i']))
        expected_itemsets.append(set(['c', 'f', 'h']))
        expected_itemsets.append(set(['c', 'f', 'h', 'i']))
        expected_itemsets.append(set(['c', 'h']))
        expected_itemsets.append(set(['c', 'h', 'i']))
        for itemset in expected_itemsets:
            self.assertIn(itemset, fs_star_Y)

    def test_MAR_MaxSC_OneClass(self):

        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 1)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)

        gen_L_C1 = match.generators
        R1 = set(['a', 'i'])
        S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['c', 'e', 'a', 'g', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 12)
        expected_rules = []
        expected_rules.append(Rule(set(['e']), set(['a'])))
        expected_rules.append(Rule(set(['e']), set(['a', 'i'])))
        expected_rules.append(Rule(set(['c', 'e']), set(['a'])))
        expected_rules.append(Rule(set(['c', 'e']), set(['a', 'i'])))
        expected_rules.append(Rule(set(['e', 'g']), set(['a'])))
        expected_rules.append(Rule(set(['e', 'g']), set(['a', 'i'])))
        expected_rules.append(Rule(set(['c', 'e', 'g']), set(['a'])))
        expected_rules.append(Rule(set(['c', 'e', 'g']), set(['a', 'i'])))
        expected_rules.append(Rule(set(['g']), set(['a'])))
        expected_rules.append(Rule(set(['g']), set(['a', 'i'])))
        expected_rules.append(Rule(set(['c', 'g']), set(['a'])))
        expected_rules.append(Rule(set(['c', 'g']), set(['a', 'i'])))

    def test_MAR_MaxSC_OneClass_2(self):

        # From publication example 3.b
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['a', 'c', 'f', 'h', 'i']),
                                            2 / 7, 1 / 7, 1)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['a'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)
        gen_L_C1 = match.generators
        R1 = set(['c', 'f', 'h', 'i'])
        S_star_S1 = set(['a', 'c', 'f', 'h', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['a', 'c', 'f', 'h', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 6)
        expected_rules = []
        expected_rules.append(Rule(set(['a']), set(['c', 'f'])))
        expected_rules.append(Rule(set(['a']), set(['c', 'f', 'i'])))
        expected_rules.append(Rule(set(['a']), set(['c', 'f', 'h'])))
        expected_rules.append(Rule(set(['a']), set(['c', 'f', 'h', 'i'])))
        expected_rules.append(Rule(set(['a']), set(['c', 'h'])))
        expected_rules.append(Rule(set(['a']), set(['c', 'h', 'i'])))

    def test_MAR_MaxSC_OneClass_3(self):

        # From publication example 1.c
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['a', 'c', 'f', 'h', 'i']),
                                            2 / 7, 1 / 7, 1)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['a'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)
        gen_L_C1 = match.generators
        R1 = set(['c', 'f', 'h', 'i'])
        S_star_S1 = set(['a', 'f', 'h', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['a', 'c', 'f', 'h', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 6)
        expected_rules = []
        expected_rules.append(Rule(set(['a']), set(['f'])))
        expected_rules.append(Rule(set(['a']), set(['f', 'i'])))
        expected_rules.append(Rule(set(['a']), set(['h'])))
        expected_rules.append(Rule(set(['a']), set(['h', 'i'])))
        expected_rules.append(Rule(set(['a']), set(['f', 'h'])))
        expected_rules.append(Rule(set(['a']), set(['f', 'h', 'i'])))

    def test_MAR_MaxSC_OneClass_4(self):

        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(rule_miner.lcg,
                                            set(['c', 'e', 'a', 'g', 'i']),
                                            2 / 7, 1 / 7, 5 / 7)

        # Generate rules for S_star_S1 = set(['c', 'e', 'a', 'g', 'i'])
        L_C1 = set(['c', 'e', 'g'])
        match = analyzer.search_node_with_closure(L_C1, lcg_S)
        gen_L_C1 = match.generators
        R1 = set(['a', 'i'])
        S_star_S1 = set(['a', 'c', 'i'])
        match = analyzer.search_node_with_closure(S_star_S1, lcg_S)
        gen_S_star_S1 = match.generators
        S1 = set(['c', 'e', 'a', 'g', 'i'])

        rules = rule_miner.MAR_MaxSC_OneClass(L_C1, gen_L_C1, R1, S_star_S1,
                                              gen_S_star_S1, S_star_S1)

        self.assertEqual(len(rules), 2)
        expected_rules = []

    def test_mine_rules_1(self):
        # From publication example 3.a
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({'c', 'e', 'g'})
        R1 = set(['a', 'i'])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 14)

    def test_mine_rules_2(self):
        # From publication example 3.b
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({'a'})
        R1 = set(['c', 'f', 'h', 'i'])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 12)

    def test_mine_rules_1_integer(self):
        analyzer = GCA(self.db_rules_integer,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({3, 5, 7})
        R1 = set([1, 9])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 14)

    def test_mine_rules_2_integer(self):
        # From publication example 3.b
        analyzer = GCA(self.db_rules_integer,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        L1 = set({1})
        R1 = set([3, 6, 8, 9])
        rule_miner = RAMCM(analyzer.lcg_into_list())
        rule_miner.mine(1 / 7, 5 / 7, 1 / 3, 0.9, L1, R1)

        self.assertEqual(len(rule_miner.ars), 12)
    def test_MFCS_FromLattice(self):
        analyzer = GCA(self.db_rules,
                       1 / 7)  # percentage indicated in publication
        analyzer.clean_database()
        analyzer.mine()

        rule_miner = RAMCM(analyzer.lcg_into_list())
        lcg_S = rule_miner.MFCS_FromLattice(
            rule_miner.lcg, set(['a', 'c', 'f', 'h', 'i']),
            rule_miner._get_support(set(['a', 'c', 'f', 'h', 'i'])), 1 / 7, 1)

        self.assertEqual(len(lcg_S), 6)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']),
                     [['c', 'f'], ['c', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None))
        expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None))
        expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None))
        expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None))

        for index, expected in enumerate(expected_LGC):
            # check closure
            match = analyzer.search_node_with_closure(expected.closure, lcg_S)
            self.assertSequenceEqual(expected.closure, match.closure)

            # check support
            self.assertEqual(expected.support, match.support)

            # check generators
            for generator in expected.generators:
                self.assertTrue(
                    is_in_generators(generator, match.generators, True))
    def test_mine(self):
        analyzer = GCA(
            self.db,
            0.16)  #percentage to get a min_supp of 1 matching the publication
        analyzer.clean_database()
        analyzer.mine()
        #closed_items = analyzer.lcg_into_list() for double hash

        db_size = len(self.db)

        expected_LGC = []
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'd', 'f', 'h']),
                     [['d'], ['a', 'f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['a', 'h']), [['a']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['f', 'h']), [['f']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['e', 'g', 'h']),
                     [['g'], ['e', 'h']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['b', 'c']), [['b']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['e']), [['e']], None))
        expected_LGC.append(
            GCA.Node(4 / analyzer.db_length, set(['h']), [['h']], None))
        expected_LGC.append(
            GCA.Node(5 / analyzer.db_length, set(['c']), [['c']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set([
                'a', 'd', 'f', 'h', 'e', 'g'
            ]), [['d', 'g'], ['d', 'e'], ['a', 'f', 'g'], ['a', 'f', 'e']],
                     None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length, set(['a', 'd', 'f', 'h', 'c']),
                     [['d', 'c'], ['a', 'f', 'c']], None))

        #TODO: check with publication's authors since aheg appears in two transactions in the database.
        #TODO: the example illustration shows an error with support of 1 but two transactions 1 and 3
        #expected_LGC.append(GCA.Node(1/analyzer.db_length,set(['a','h','e','g']),[['a','g'],['a','e']],None))

        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'e', 'g']),
                     [['a', 'g'], ['a', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['a', 'h', 'b', 'c', 'e', 'g']),
                     [['a', 'b'], ['a', 'g', 'c'], ['a', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'c']),
                     [['a', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'e', 'g']),
                     [['f', 'g'], ['f', 'e']], None))
        expected_LGC.append(
            GCA.Node(1 / analyzer.db_length,
                     set(['f', 'h', 'b', 'c', 'e', 'g']),
                     [['f', 'b'], ['f', 'g', 'c'], ['f', 'e', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'c']),
                     [['f', 'c']], None))
        expected_LGC.append(
            GCA.Node(2 / analyzer.db_length, set(['e', 'g', 'h', 'b', 'c']),
                     [['g', 'b'], ['g', 'c'], ['b', 'h'], ['c', 'e', 'h']],
                     None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['b', 'c', 'e']),
                     [['b', 'e'], ['c', 'e']], None))
        expected_LGC.append(
            GCA.Node(3 / analyzer.db_length, set(['h', 'c']), [['h', 'c']],
                     None))

        for index, expected in enumerate(expected_LGC):
            #check closure
            match = analyzer.search_node_with_closure(expected.closure)
            self.assertSequenceEqual(expected.closure, match.closure)

            #check support
            self.assertEqual(expected.support, match.support)

            #check generators
            for generator in expected.generators:
                #match = analyzer.search_node_with_generator(None, generator)
                self.assertIsNotNone(match)

        self.assertEqual(len(expected_LGC), len(analyzer.lcg_into_list()))
    def test_EOB_L2(self):
        self.analyzer.attribute_folders(self.L2, 2)
        self.analyzer.extend_merge(self.L2, 2)
        folders = self.analyzer.L_folders

        self.assertEqual(len(self.analyzer.L_folders), 5)

        self.assertEqual(len(folders[GCA.key_folder(['d'])]), 2)
        self.assertEqual(len(folders[GCA.key_folder(['a'])]), 4)
        self.assertEqual(len(folders[GCA.key_folder(['g'])]), 3)
        self.assertEqual(len(folders[GCA.key_folder(['f'])]), 3)
        self.assertEqual(len(folders[GCA.key_folder(['h'])]), 1)

        folder = folders[GCA.key_folder(['d'])]
        self.assertEqual(folder[0].closure,
                         frozenset(('a', 'd', 'f', 'g', 'h', 'e', 'g')))
        self.assertEqual(folder[0].generators, [['d', 'g'], ['d', 'e']])
        self.assertEqual(folder[0].transactions, set([3]))
        self.assertEqual(folder[0].support, 1)
        self.assertEqual(folder[1].closure, frozenset(
            ('a', 'd', 'f', 'h', 'c')))
        self.assertEqual(folder[1].generators, [['d', 'c']])
        self.assertEqual(folder[1].transactions, set([2]))
        self.assertEqual(folder[1].support, 1)

        folder = folders[GCA.key_folder(['a'])]
        self.assertEqual(folder[0].closure, frozenset(('a', 'h', 'f')))
        self.assertEqual(folder[0].generators, [['a', 'f']])
        self.assertEqual(folder[0].transactions, set([2, 3]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure, frozenset(('a', 'h', 'e', 'g')))
        self.assertEqual(folder[1].generators, [['a', 'g'], ['a', 'e']])
        self.assertEqual(folder[1].transactions, set([1, 3]))
        self.assertEqual(folder[1].support, 2)
        self.assertEqual(folder[2].closure,
                         frozenset(('a', 'h', 'b', 'c', 'e', 'g')))
        self.assertEqual(folder[2].generators, [['a', 'b']])
        self.assertEqual(folder[2].transactions, set([1]))
        self.assertEqual(folder[2].support, 1)
        self.assertEqual(folder[3].closure, frozenset(('a', 'h', 'c')))
        self.assertEqual(folder[3].generators, [['a', 'c']])
        self.assertEqual(folder[3].transactions, set([1, 2]))
        self.assertEqual(folder[3].support, 2)

        folder = folders[GCA.key_folder(['g'])]
        self.assertEqual(folder[0].closure, frozenset(
            ('e', 'g', 'h', 'b', 'c')))
        self.assertEqual(folder[0].generators,
                         [['g', 'b'], ['g', 'c'], ['b', 'h']])
        self.assertEqual(folder[0].transactions, set([1, 4]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure, frozenset(('b', 'c', 'e')))
        self.assertEqual(folder[1].generators, [['b', 'e'], ['e', 'c']])
        self.assertEqual(folder[1].transactions, set([1, 4, 5]))
        self.assertEqual(folder[1].support, 3)
        self.assertEqual(folder[2].closure, frozenset(('e', 'h')))
        self.assertEqual(folder[2].generators, [['e', 'h']])
        self.assertEqual(folder[2].transactions, set([1, 3, 4]))
        self.assertEqual(folder[2].support, 3)

        folder = folders[GCA.key_folder(['f'])]
        self.assertEqual(folder[0].closure, frozenset(('f', 'h', 'e', 'g')))
        self.assertEqual(folder[0].generators, [['f', 'g'], ['f', 'e']])
        self.assertEqual(folder[0].transactions, set([3, 4]))
        self.assertEqual(folder[0].support, 2)
        self.assertEqual(folder[1].closure,
                         frozenset(('f', 'h', 'b', 'c', 'e', 'g')))
        self.assertEqual(folder[1].generators, [['f', 'b']])
        self.assertEqual(folder[1].transactions, set([4]))
        self.assertEqual(folder[1].support, 1)
        self.assertEqual(folder[2].closure, frozenset(('f', 'h', 'c')))
        self.assertEqual(folder[2].generators, [['f', 'c']])
        self.assertEqual(folder[2].transactions, set([2, 4]))
        self.assertEqual(folder[2].support, 2)
Example #18
0
def use_reference(file):
    db = []
    with open('./../data/' + file + '.dat') as csvfile:
        reader = csvfile.read()
        rows = reader.split('\n')
        for row in rows:
            transaction = row.split(' ')
            db.append(transaction)

    min_support = 0.95
    analyzer = GCA(db, min_support)
    analyzer.mine()
    frequent_items = analyzer.lcg_into_list()

    lattice = analyzer.lcg_into_lattice()

    nb_frequent_items = len(frequent_items)
    print('Nb frequent items with min_support = ' + str(min_support) + ': ' +
          str(nb_frequent_items))

    rule_miner = RAMMax(analyzer.lcg_into_list())
    #rule_miner = RAMin(analyzer.lcg_into_list())

    nb_rules = 0
    nb_basic_rules = 0
    print('Extract rules from frequent items: ')

    rules = deque()
    for node in lattice.values():
        S = node.fci
        print('S: ' + str(S.closure))

        to_extract = deque()
        to_extract.append(node)
        visited = deque()
        while len(to_extract) > 0:
            current = to_extract.popleft()
            visited.append(current)
            L = current.fci

            #RAR = rule_miner.mine_basic(L, S)
            RAR = rule_miner.mine_RAR(L, S, 0.95, 1.0, 0.95, 1.0)
            ne_rules = rule_miner.mine_CAR2(L, S, RAR, analyzer)

            is_new_rule = True
            for new_rule in ne_rules:
                for saved_rules in rules:
                    if new_rule.left == saved_rules.left and new_rule.right == saved_rules.right:
                        is_new_rule = False
                        break
                if is_new_rule:
                    rules.append(new_rule)
                    nb_rules += 1

            #nb_basic_rules += len(RAR)

            print('  - L:' + str(L.closure) + ',gen: ' + str(L.generators) +
                  ', nb BR min/max: ' + str(len(RAR)) + ', TBR: ' +
                  str(nb_basic_rules) + ', TBC: ' + str(nb_rules))
            #print(' - L:' + str(L.closure) + ',gen: ' + str(L.generators) + ', nb BR min/max: ' + str(len(RAR)) + ', TBR: ' + str(nb_basic_rules))
            for rule in RAR:
                print('  - ' + rule.to_str())

            for child in current.children:
                for grandchild in child.children:
                    if grandchild not in to_extract and grandchild not in visited:
                        to_extract.append(grandchild)

    print('nb rules: ' + str(nb_basic_rules))
Example #19
0
def find_closed_items():
    print('Load deck')
    card_loader = MagicLoader()
    card_loader.load('./../data/magic_cards/AllCards-x.json')

    print('Clean deck')
    deck_loader = DeckManager()

    list_files = os.listdir("./../data/decks_mtgdeck_net")
    for i in range(len(list_files)):  # returns list
        list_files[i] = './../data/decks_mtgdeck_net/' + list_files[i]

    deck_loader.load_from_mtgdeck_csv(list_files, card_loader)
    deck_loader.extract_lands(card_loader.lands, card_loader)

    analyzer = GCA(deck_loader.decks, 0.05)

    print('Start mining ' + str(len(deck_loader.decks)) + ' decks')
    analyzer.mine()
    print('nb closed items = ' + str(len(analyzer.lcg_into_list())))
    #deck_loader.write_frequent_items_into_csv('genclose_results', analyzer.get_closed_items_closures(), card_loader)

    frequent_items = analyzer.lcg_into_list()
    lattice = analyzer.lcg_into_lattice()

    generated_rules = []
    '''
    rule_miner = RAMCM(frequent_items)
    for pair in combinations(list(range(nb_frequent_items)), 2):
        L1 = frequent_items[pair[0]].closure
        R1 = frequent_items[pair[1]].closure
        rule_miner.mine(0.3, 1.0, 0.33, 1.0, L1, R1)
        generated_rules.extend(rule_miner.ars)
    '''

    rule_miner = RAMMax(analyzer.lcg_into_list())

    nb_rules = 0
    nb_basic_rules = 0
    print('Extract rules from frequent items: ')

    for node in lattice.values():
        S = node.fci
        #print('S: ' + str(S.closure))

        to_extract = deque()
        to_extract.append(node)
        to_extract.extend(node.children)
        visited = deque()
        while len(to_extract) > 0:
            current = to_extract.popleft()
            visited.append(current)
            L = current.fci

            RAR = rule_miner.mine_RAR(L, S, 0.05, 0.08, 0.7, 0.9)
            '''
            nb_consequent = len(rule_miner.mine_CAR2(L, S, RAR, analyzer))

            nb_basic_rules += len(RAR)
            nb_rules += nb_consequent

            print('  - L:' + str(L.closure) + ',gen: ' + str(L.generators) + ', nb BR min/max: ' + str(
                len(RAR)) + ', nb CR: ' + str(nb_consequent) + ', TBR: ' + str(nb_basic_rules) + ', TBC: ' + str(
                nb_rules))

            for child in current.children:
                for grandchild in child.children:
                    if grandchild not in to_extract and grandchild not in visited:
                        to_extract.append(grandchild)
            '''

            for rule in RAR:
                text = str(round(rule.support, 2)) + ' - ' + str(
                    round(rule.confidence, 2)) + ': '
                for l in rule.left:
                    text += card_loader.hash_id_name[l] + ' + '
                text += ' ----> '
                for r in rule.right:
                    text += card_loader.hash_id_name[r] + ' + '
                print(text)

    print('nb rules: ' + str(nb_rules))