def test_InvertedIndex(): data = ("a,b,c,d,e,f\n" "g,h,i,j,k,l\n" "z,x\n" "z,x\n" "z,x,y\n" "z,x,y,i\n") index = InvertedIndex() index.load(data) assert (index.support({Item("a")}) == 1 / 6) assert (index.support({Item("b")}) == 1 / 6) assert (index.support({Item("c")}) == 1 / 6) assert (index.support({Item("d")}) == 1 / 6) assert (index.support({Item("e")}) == 1 / 6) assert (index.support({Item("f")}) == 1 / 6) assert (index.support({Item("h")}) == 1 / 6) assert (index.support({Item("i")}) == 2 / 6) assert (index.support({Item("j")}) == 1 / 6) assert (index.support({Item("k")}) == 1 / 6) assert (index.support({Item("l")}) == 1 / 6) assert (index.support({Item("z")}) == 4 / 6) assert (index.support({Item("x")}) == 4 / 6) assert (index.support({Item("y")}) == 2 / 6) sup_zx = index.support({Item("z"), Item("x")}) assert (sup_zx == 4 / 6) sup_zxy = index.support({Item("z"), Item("x"), Item("y")}) assert (sup_zxy == 2 / 6) sup_zxyi = index.support({Item("z"), Item("x"), Item("y"), Item("i")}) assert (sup_zxyi == 1 / 6)
def test_apriori(): data = ("a,b,c,d,e,f\n" "g,h,i,j,k,l\n" "z,x\n" "z,x\n" "z,x,y\n" "z,x,y,i\n") expectedItemSets = { ItemSet("i"): 2 / 6, ItemSet("z"): 4 / 6, ItemSet("x"): 4 / 6, ItemSet("y"): 2 / 6, ItemSet("xz"): 4 / 6, ItemSet("yz"): 2 / 6, ItemSet("xy"): 2 / 6, ItemSet("xyz"): 2 / 6 } index = InvertedIndex() index.load(data) itemsets = apriori(index, 2 / 6) assert (set(expectedItemSets.keys()) == set(itemsets)) for itemset in itemsets: assert (expectedItemSets[itemset] == index.support(itemset)) print("Itemsets={}".format([i for i in itemsets if len(i) > 1])) # (antecedent, consequent, confidence, lift, support) expectedRules = { (frozenset({Item("x"), Item("y")}), frozenset({Item("z")}), 1, 1.5, 1 / 3), (frozenset({Item("x")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3), (frozenset({Item("x")}), frozenset({Item("z"), Item("y")}), 0.5, 1.5, 1 / 3), (frozenset({Item("x")}), frozenset({Item("z")}), 1, 1.5, 2 / 3), (frozenset({Item("y")}), frozenset({Item("x")}), 1, 1.5, 1 / 3), (frozenset({Item("y")}), frozenset({Item("z"), Item("x")}), 1, 1.5, 1 / 3), (frozenset({Item("y")}), frozenset({Item("z")}), 1, 1.5, 1 / 3), (frozenset({Item("z"), Item("x")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3), (frozenset({Item("z"), Item("y")}), frozenset({Item("x")}), 1, 1.5, 1 / 3), (frozenset({Item("z")}), frozenset({Item("x"), Item("y")}), 0.5, 1.5, 1 / 3), (frozenset({Item("z")}), frozenset({Item("x")}), 1, 1.5, 2 / 3), (frozenset({Item("z")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3), } rules = set(generate_rules(itemsets, 0, 0, index)) for (antecedent, consequent, confidence, lift, support) in rules: print("{}, {} conf={:.4f}, {:.4f}, {:.4f}".format( antecedent, consequent, confidence, lift, support)) assert (rules == expectedRules)
def test_apriori(): data = ("a,b,c,d,e,f\n" "g,h,i,j,k,l\n" "z,x\n" "z,x\n" "z,x,y\n" "z,x,y,i\n") expectedItemSets = {ItemSet("i"): 2 / 6, ItemSet("z"): 4 / 6, ItemSet("x"): 4 / 6, ItemSet("y"): 2 / 6, ItemSet("xz"): 4 / 6, ItemSet("yz"): 2 / 6, ItemSet("xy"): 2 / 6, ItemSet("xyz"): 2 / 6} index = InvertedIndex() index.load(data) itemsets = apriori(index, 2 / 6) assert(len(itemsets) == len(expectedItemSets)) for itemset in itemsets: assert(frozenset(itemset) in expectedItemSets) for itemset in itemsets: assert(expectedItemSets[frozenset(itemset)] == index.support(itemset)) print("Itemsets={}".format([i for i in itemsets if len(i) > 1])) def itemize(a): return list(map(item_id, a)) # (antecedent, consequent, confidence, lift, support) rx = [ (['y'], ['x'], 1.0, 1.5, 0.3333333333333333), (['x'], ['y'], 0.5, 1.5, 0.3333333333333333), (['y'], ['z'], 1.0, 1.5, 0.3333333333333333), (['z'], ['y'], 0.5, 1.5, 0.3333333333333333), (['x'], ['z'], 1.0, 1.5, 0.6666666666666666), (['z'], ['x'], 1.0, 1.5, 0.6666666666666666), (['x', 'y'], ['z'], 1.0, 1.5, 0.3333333333333333), (['z', 'y'], ['x'], 1.0, 1.5, 0.3333333333333333), (['z', 'x'], ['y'], 0.5, 1.5, 0.3333333333333333), (['y'], ['z', 'x'], 1.0, 1.5, 0.3333333333333333), (['x'], ['z', 'y'], 0.5, 1.5, 0.3333333333333333), (['z'], ['x', 'y'], 0.5, 1.5, 0.3333333333333333) ] expectedRules = list(map(lambda a: (itemize(a[0]), itemize(a[1]), a[2], a[3], a[4]), rx)) itemset_counts = dict(map(lambda i: (tuple(i), index.count(i)), itemsets)) rules = generate_rules( itemsets, itemset_counts, index.num_transactions, 0, 0) def deitemize(a): return list(map(item_str, a)) p = list(map(lambda a: (deitemize(a[0]), deitemize(a[1]), a[2], a[3], a[4]), rules)) print("rules") print(p) for (antecedent, consequent, confidence, lift, support) in rules: print("{}, {} conf={:.4f}, {:.4f}, {:.4f}". format(antecedent, consequent, confidence, lift, support)) assert(len(rules) == len(expectedRules)) for i in range(len(rules)): assert(expectedRules[i] in rules)