예제 #1
0
def main(datafile, min_sup, min_conf, name_file):
    # Expects the sparse vector format data
    reader = csv.reader(datafile)
    data = []
    max_val = 0
    ignore = {0}
    for row in reader:
        r = []
        for idx, val in enumerate(row):
            if idx in ignore:
                continue
            i = int(val)
            r.append(i)
            if i > max_val:
                max_val = i
        data.append(frozenset(r))
    freq_sets = apriori.find_frequent_itemsets(data, max_val + 1, min_sup)
    rules = apriori.find_association_rules(data, freq_sets, min_conf)
    if name_file:
        reader = csv.reader(name_file)
        lookup = {int(l[0]) : l[1] + l[2] for l in reader}
        print("\n##Skyline frequent itemsets\n~~~")
        for i, s in enumerate(freq_sets):
            print("Itemset {}:\n\tContains: {}\n\tSupport: {}".format(i, ", ".join(lookup[p] for p in s), apriori.support(s, data)))
        print("~~~\n\n##Skyline association rules\n~~~")
        for i, r in enumerate(rules):
            print("Rule {}:\n\tLHS: {}\n\tRHS: {}\n\tSupport: {}\n\tConfidence: {}".format(i, ", ".join(lookup[p] for p in r[0]), lookup[r[1]], r[2] * 100, r[3] * 100))
        print("~~~")
    else:
        for i, r in enumerate(rules):
            print("Rule {}:     {}  ---> {}    [sup={} conf={}]".format(i, ", ".join(str(i) for i in r[0]), r[1], r[2] * 100, r[3] * 100))
예제 #2
0
    transactions = []
    for (id, transaction) in dict.items():
        transactions.append(transaction)
    return list(transactions)


## Example of usage
# Data file syntax: userID, websiteName

# you may also want to remove whitespace characters like `\n` at the end of each line
items = []
items.append('Knowledge Base')
transactions = loadData('weblog.csv')

print("Support for Knowledge Base")
print(ap.support(items, transactions))

items.append('Support Desktop')

print("Support for Knowledge Base and Support Desktop")
print(ap.support(items, transactions))

items.append('MS Office')

print("Support for Knowledge Base,Support Desktop and MS Office")
print(ap.support(items, transactions))

print("Confidence For Knowledge Base => Support Desktop")
print(ap.confidence('Knowledge Base', 'Support Desktop', transactions))

print("Frequent Items sets")
예제 #3
0
#use support instead of supportcount
bestSeqs = map(lambda s: [getNames(s[0], courseNames), s[1]/len(datum)], bestSeqs)

#Include the course name for each

with open("popularArches.py", "a") as myfile:
	myfile.write("seqs = [")
	myfile.write(str(bestSeqs))
	myfile.write("] \n")


exit(1)
regs = float(len(datumSeq))
print "a"
tmp = map(lambda g: [g[0], g[1]/regs, apriori.support(apriori.supportList(g[0], datumSeq), regs)], graduatingSeqs)
print "b"
graduatingRules = map(lambda g: [getNames(g[0], courseNames), g[1], g[1]/g[2]], tmp)
print "c"
tmp = map(lambda g: [g[0], g[1]/regs, apriori.support(apriori.supportList(g[0], datumSeq), regs)], notGraduatingSeqs)
print "d"
notGraduatingRules = map(lambda g: [getNames(g[0], courseNames), g[1], g[1]/g[2]],tmp)
print "e"
with open("graduatingRules.py", "a") as myfile:
	myfile.write(str(graduatingRules) + "\n")

with open("notGraduatingRules.py", "a") as myfile:
	myfile.write(str(notGraduatingRules) + "\n")