Exemple #1
0
def test_InvertedIndex():
    data = ("a,b,c,d,e,f\n"
            "g,h,i,j,k,l\n"
            "z,x\n"
            "z,x\n"
            "z,x,y\n"
            "z,x,y,i\n")
    index = InvertedIndex()
    index.load(data)
    assert (index.support({Item("a")}) == 1 / 6)
    assert (index.support({Item("b")}) == 1 / 6)
    assert (index.support({Item("c")}) == 1 / 6)
    assert (index.support({Item("d")}) == 1 / 6)
    assert (index.support({Item("e")}) == 1 / 6)
    assert (index.support({Item("f")}) == 1 / 6)
    assert (index.support({Item("h")}) == 1 / 6)
    assert (index.support({Item("i")}) == 2 / 6)
    assert (index.support({Item("j")}) == 1 / 6)
    assert (index.support({Item("k")}) == 1 / 6)
    assert (index.support({Item("l")}) == 1 / 6)
    assert (index.support({Item("z")}) == 4 / 6)
    assert (index.support({Item("x")}) == 4 / 6)
    assert (index.support({Item("y")}) == 2 / 6)

    sup_zx = index.support({Item("z"), Item("x")})
    assert (sup_zx == 4 / 6)

    sup_zxy = index.support({Item("z"), Item("x"), Item("y")})
    assert (sup_zxy == 2 / 6)

    sup_zxyi = index.support({Item("z"), Item("x"), Item("y"), Item("i")})
    assert (sup_zxyi == 1 / 6)
Exemple #2
0
def test_apriori():
    data = ("a,b,c,d,e,f\n"
            "g,h,i,j,k,l\n"
            "z,x\n"
            "z,x\n"
            "z,x,y\n"
            "z,x,y,i\n")

    expectedItemSets = {
        ItemSet("i"): 2 / 6,
        ItemSet("z"): 4 / 6,
        ItemSet("x"): 4 / 6,
        ItemSet("y"): 2 / 6,
        ItemSet("xz"): 4 / 6,
        ItemSet("yz"): 2 / 6,
        ItemSet("xy"): 2 / 6,
        ItemSet("xyz"): 2 / 6
    }

    index = InvertedIndex()
    index.load(data)
    itemsets = apriori(index, 2 / 6)
    assert (set(expectedItemSets.keys()) == set(itemsets))
    for itemset in itemsets:
        assert (expectedItemSets[itemset] == index.support(itemset))

    print("Itemsets={}".format([i for i in itemsets if len(i) > 1]))

    # (antecedent, consequent, confidence, lift, support)
    expectedRules = {
        (frozenset({Item("x"),
                    Item("y")}), frozenset({Item("z")}), 1, 1.5, 1 / 3),
        (frozenset({Item("x")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3),
        (frozenset({Item("x")}), frozenset({Item("z"),
                                            Item("y")}), 0.5, 1.5, 1 / 3),
        (frozenset({Item("x")}), frozenset({Item("z")}), 1, 1.5, 2 / 3),
        (frozenset({Item("y")}), frozenset({Item("x")}), 1, 1.5, 1 / 3),
        (frozenset({Item("y")}), frozenset({Item("z"),
                                            Item("x")}), 1, 1.5, 1 / 3),
        (frozenset({Item("y")}), frozenset({Item("z")}), 1, 1.5, 1 / 3),
        (frozenset({Item("z"),
                    Item("x")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3),
        (frozenset({Item("z"),
                    Item("y")}), frozenset({Item("x")}), 1, 1.5, 1 / 3),
        (frozenset({Item("z")}), frozenset({Item("x"),
                                            Item("y")}), 0.5, 1.5, 1 / 3),
        (frozenset({Item("z")}), frozenset({Item("x")}), 1, 1.5, 2 / 3),
        (frozenset({Item("z")}), frozenset({Item("y")}), 0.5, 1.5, 1 / 3),
    }

    rules = set(generate_rules(itemsets, 0, 0, index))

    for (antecedent, consequent, confidence, lift, support) in rules:
        print("{}, {} conf={:.4f}, {:.4f}, {:.4f}".format(
            antecedent, consequent, confidence, lift, support))

    assert (rules == expectedRules)
Exemple #3
0
def test_apriori():
    data = ("a,b,c,d,e,f\n"
            "g,h,i,j,k,l\n"
            "z,x\n"
            "z,x\n"
            "z,x,y\n"
            "z,x,y,i\n")

    expectedItemSets = {ItemSet("i"): 2 / 6,
                        ItemSet("z"): 4 / 6,
                        ItemSet("x"): 4 / 6,
                        ItemSet("y"): 2 / 6,
                        ItemSet("xz"): 4 / 6,
                        ItemSet("yz"): 2 / 6,
                        ItemSet("xy"): 2 / 6,
                        ItemSet("xyz"): 2 / 6}

    index = InvertedIndex()
    index.load(data)
    itemsets = apriori(index, 2 / 6)
    assert(len(itemsets) == len(expectedItemSets))
    for itemset in itemsets:
        assert(frozenset(itemset) in expectedItemSets)
    for itemset in itemsets:
        assert(expectedItemSets[frozenset(itemset)] == index.support(itemset))

    print("Itemsets={}".format([i for i in itemsets if len(i) > 1]))

    def itemize(a):
        return list(map(item_id, a))

    # (antecedent, consequent, confidence, lift, support)
    rx = [
        (['y'], ['x'], 1.0, 1.5, 0.3333333333333333),
        (['x'], ['y'], 0.5, 1.5, 0.3333333333333333),
        (['y'], ['z'], 1.0, 1.5, 0.3333333333333333),
        (['z'], ['y'], 0.5, 1.5, 0.3333333333333333),
        (['x'], ['z'], 1.0, 1.5, 0.6666666666666666),
        (['z'], ['x'], 1.0, 1.5, 0.6666666666666666),
        (['x', 'y'], ['z'], 1.0, 1.5, 0.3333333333333333),
        (['z', 'y'], ['x'], 1.0, 1.5, 0.3333333333333333),
        (['z', 'x'], ['y'], 0.5, 1.5, 0.3333333333333333),
        (['y'], ['z', 'x'], 1.0, 1.5, 0.3333333333333333),
        (['x'], ['z', 'y'], 0.5, 1.5, 0.3333333333333333),
        (['z'], ['x', 'y'], 0.5, 1.5, 0.3333333333333333)
    ]

    expectedRules = list(map(lambda a: (itemize(a[0]), itemize(a[1]), a[2], a[3], a[4]), rx))

    itemset_counts = dict(map(lambda i: (tuple(i), index.count(i)), itemsets))
    rules = generate_rules(
        itemsets,
        itemset_counts,
        index.num_transactions,
        0,
        0)

    def deitemize(a):
        return list(map(item_str, a))

    p = list(map(lambda a: (deitemize(a[0]), deitemize(a[1]), a[2], a[3], a[4]), rules))
    print("rules")
    print(p)

    for (antecedent,
         consequent,
         confidence,
         lift,
         support) in rules:
        print("{}, {} conf={:.4f}, {:.4f}, {:.4f}".
              format(antecedent, consequent, confidence, lift, support))

    assert(len(rules) == len(expectedRules))
    for i in range(len(rules)):
        assert(expectedRules[i] in rules)