Esempio n. 1
0
def run_main():
	#处理数据
	#changeData()
	#handleData()
	#测试频繁项集
	dataSet = apriori.loadDataSet()
	print(dataSet)
	print(len(dataSet))
	#C1 = apriori.createC1(dataSet)
	#D = list(map(set,dataSet))
	L,suppData = apriori.apriori(dataSet,0.2)
	print(L)
	print("========")
	print(L[0])
Esempio n. 2
0
def test1():
    dataSet = apriori.loadDataSet()
    print(dataSet)  #[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]

    #C1=apriori.createC1(dataSet)
    #print(set(C1)) #{frozenset({4}), frozenset({5}), frozenset({2}), frozenset({3}), frozenset({1})}
    #print(list(C1)) #[frozenset({1}), frozenset({2}), frozenset({3}), frozenset({4}), frozenset({5})]

    #D=map(set,dataSet)
    #print(list(D)) #[{1, 3, 4}, {2, 3, 5}, {1, 2, 3, 5}, {2, 5}] 注意!!被list(map1)之后,map1的内容就空了。。。好像set(.)也会清空人家

    #L1,suppData0 = apriori.scanD(D, C1, 0.5)  #不能直接用了,要把D和C1先变成list
    #print(L1)   #[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})]
    #print(suppData0) #{frozenset({4}): 0.25, frozenset({5}): 0.75, frozenset({2}): 0.75, frozenset({3}): 0.75, frozenset({1}): 0.5}

    L, suppData = apriori.apriori(dataSet, 0.5)
    print(L)
    print(suppData)
    rules = apriori.generateRules(L, suppData, minConf=0.5)
    print(rules)
Esempio n. 3
0
class Test:
    if __name__ == "__main__":
        #fza=frozenset(['a','bc'])
        #adict={fza:1,'b':2}
        #print(adict)
        # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串;
        #cc= loadDataSet()
        #createC1(cc)
        #c=[6,5,4,9,8,3,5,6,8,1]
        #c.sort()
        #print(c)
        dataSet = apriori.loadDataSet()
        print(dataSet)
        C1 = apriori.createC1(dataSet)
        C1
        D = map(set, dataSet)
        L1 = []
        supportData = []
        (L1, supportData) = apriori.scanD(D, C1, 0.5)
        print(L1)
        print(supportData)
Esempio n. 4
0
import apriori

dataSet = apriori.loadDataSet()
L, supportData = apriori.apriori(dataSet, minSupport=0.1)

print "[result]-----------------------------------------"
rules = apriori.generateRules(L, supportData, minConf=1.0)
Esempio n. 5
0
#支持度(support):一个项集的支持度定义为数据集中包含该项集的记录所占的比例。即包含该项的集合的个数/总的集合个数
#可信度(cofidence):是针对一条诸如{尿布} --->{啤酒}的关联规则来定义的。
#这条规则的可信度可定义为:(3/5)支持度{尿布,啤酒}/((4/5)支持度{尿布} = 3/4,这意味着在包含尿布的多有记录中,
#关联规则对其中的3/4的记录都是适用的。

#apriori原理:可以减少可能感兴趣的项集。apriori原理是说,如果某个项集是频繁的,那么它的所有子集也是频繁的。
#反过来说,如果一个项集是非频繁集,那么它的所有超集也是非频繁集。


#问题1:为什么关联规则中,如果项集中有三个元素,为什么只计算1个 -> 2个
#而不计算2个  --->1个???????????????

import apriori
from votesmart import votesmart

dataSet = apriori.loadDataSet()
#C1 = apriori.createC1(dataSet)
#print ("C1 is %s"  % C1)
#D = map(set,dataSet)
#print ( "%r"  % D)
#L1,suppData0 = apriori.scanD(list(D), list(C1), 0.5)
#print (L1)
#print (suppData0)

L,suppData = apriori.apriori(dataSet, 0.5)
print ("L is" , L)
print ("suppData is" , suppData)
#L is [[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})], [frozenset({3, 5}), frozenset({1, 3}), frozenset({2, 5}), frozenset({2, 3})], [frozenset({2, 3, 5})], []]
#suppData is {frozenset({5}): 0.75, frozenset({3}): 0.75, frozenset({2, 3, 5}): 0.5, frozenset({3, 5}): 0.5, frozenset({2, 3}): 0.5, frozenset({2, 5}): 0.75, frozenset({1}): 0.5, frozenset({1, 3}): 0.5, frozenset({2}): 0.75}

#关联规则挖掘

def aprioriGen(Lk, k):  #creates Ck
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):
            L1 = list(Lk[i])[:k - 2]
            L2 = list(Lk[j])[:k - 2]
            L1.sort()
            L2.sort()
            if L1 == L2:  #if first k-2 elements are equal
                retList.append(Lk[i] | Lk[j])  #set union
    return retList


dataSet = loadDataSet()
minSupport = 0.5
'''def apriori(dataSet, minSupport = 0.5):'''
C1 = createC1(dataSet)
D = list(map(set, dataSet))
L1, supportData = scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k - 2]) > 0):
    Ck = aprioriGen(L[k - 2], k)
    Lk, supK = scanD(D, Ck, minSupport)  #scan DB to get Lk
    supportData.update(supK)
    L.append(Lk)
    k += 1
#return L, supportData
Esempio n. 7
0
def test1():
    dataSet = apriori.loadDataSet()
    C1 = apriori.createC1(dataSet)
    L, supportData = apriori.apriori(dataSet, minSupport=0.5)
    # print(L)
    rules = apriori.generateRules(L, supportData, minConf=0.5)
Esempio n. 8
0

import apriori as ap

dataSet = ap.loadDataSet()
#print dataSet
C1 = ap.createC1(dataSet)
#print C1
D = map(set, dataSet)
#print D
L1, suppData0 = ap.scanD(D, C1, 0.5)
#print suppData0
L, S = ap.apriori(D, 0.5)
#print L

print L

List = ap.generateRules(L, S, minConf=0.4)
print List


Esempio n. 9
0
import apriori
# 导入数据集
dataSet = apriori.loadDataSet('Groceries.csv')

L, suppData = apriori.apriori(dataSet, minSupport=0.03)
# minSupport<0.05
rules = apriori.generateRules(L, suppData, minConf=0.2)
# minConf<4
print(rules)





Esempio n. 10
0
def test():
    dataSet = apriori.loadDataSet()
    print "DataSet:", dataSet
    L,suppData = apriori.apriori(dataSet)
    rules = apriori.generateRules(L, suppData, minConf=0.5)
    print rules