def run_main(): #处理数据 #changeData() #handleData() #测试频繁项集 dataSet = apriori.loadDataSet() print(dataSet) print(len(dataSet)) #C1 = apriori.createC1(dataSet) #D = list(map(set,dataSet)) L,suppData = apriori.apriori(dataSet,0.2) print(L) print("========") print(L[0])
def test1(): dataSet = apriori.loadDataSet() print(dataSet) #[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]] #C1=apriori.createC1(dataSet) #print(set(C1)) #{frozenset({4}), frozenset({5}), frozenset({2}), frozenset({3}), frozenset({1})} #print(list(C1)) #[frozenset({1}), frozenset({2}), frozenset({3}), frozenset({4}), frozenset({5})] #D=map(set,dataSet) #print(list(D)) #[{1, 3, 4}, {2, 3, 5}, {1, 2, 3, 5}, {2, 5}] 注意!!被list(map1)之后,map1的内容就空了。。。好像set(.)也会清空人家 #L1,suppData0 = apriori.scanD(D, C1, 0.5) #不能直接用了,要把D和C1先变成list #print(L1) #[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})] #print(suppData0) #{frozenset({4}): 0.25, frozenset({5}): 0.75, frozenset({2}): 0.75, frozenset({3}): 0.75, frozenset({1}): 0.5} L, suppData = apriori.apriori(dataSet, 0.5) print(L) print(suppData) rules = apriori.generateRules(L, suppData, minConf=0.5) print(rules)
class Test: if __name__ == "__main__": #fza=frozenset(['a','bc']) #adict={fza:1,'b':2} #print(adict) # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串; #cc= loadDataSet() #createC1(cc) #c=[6,5,4,9,8,3,5,6,8,1] #c.sort() #print(c) dataSet = apriori.loadDataSet() print(dataSet) C1 = apriori.createC1(dataSet) C1 D = map(set, dataSet) L1 = [] supportData = [] (L1, supportData) = apriori.scanD(D, C1, 0.5) print(L1) print(supportData)
import apriori dataSet = apriori.loadDataSet() L, supportData = apriori.apriori(dataSet, minSupport=0.1) print "[result]-----------------------------------------" rules = apriori.generateRules(L, supportData, minConf=1.0)
#支持度(support):一个项集的支持度定义为数据集中包含该项集的记录所占的比例。即包含该项的集合的个数/总的集合个数 #可信度(cofidence):是针对一条诸如{尿布} --->{啤酒}的关联规则来定义的。 #这条规则的可信度可定义为:(3/5)支持度{尿布,啤酒}/((4/5)支持度{尿布} = 3/4,这意味着在包含尿布的多有记录中, #关联规则对其中的3/4的记录都是适用的。 #apriori原理:可以减少可能感兴趣的项集。apriori原理是说,如果某个项集是频繁的,那么它的所有子集也是频繁的。 #反过来说,如果一个项集是非频繁集,那么它的所有超集也是非频繁集。 #问题1:为什么关联规则中,如果项集中有三个元素,为什么只计算1个 -> 2个 #而不计算2个 --->1个??????????????? import apriori from votesmart import votesmart dataSet = apriori.loadDataSet() #C1 = apriori.createC1(dataSet) #print ("C1 is %s" % C1) #D = map(set,dataSet) #print ( "%r" % D) #L1,suppData0 = apriori.scanD(list(D), list(C1), 0.5) #print (L1) #print (suppData0) L,suppData = apriori.apriori(dataSet, 0.5) print ("L is" , L) print ("suppData is" , suppData) #L is [[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})], [frozenset({3, 5}), frozenset({1, 3}), frozenset({2, 5}), frozenset({2, 3})], [frozenset({2, 3, 5})], []] #suppData is {frozenset({5}): 0.75, frozenset({3}): 0.75, frozenset({2, 3, 5}): 0.5, frozenset({3, 5}): 0.5, frozenset({2, 3}): 0.5, frozenset({2, 5}): 0.75, frozenset({1}): 0.5, frozenset({1, 3}): 0.5, frozenset({2}): 0.75} #关联规则挖掘
def aprioriGen(Lk, k): #creates Ck retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i + 1, lenLk): L1 = list(Lk[i])[:k - 2] L2 = list(Lk[j])[:k - 2] L1.sort() L2.sort() if L1 == L2: #if first k-2 elements are equal retList.append(Lk[i] | Lk[j]) #set union return retList dataSet = loadDataSet() minSupport = 0.5 '''def apriori(dataSet, minSupport = 0.5):''' C1 = createC1(dataSet) D = list(map(set, dataSet)) L1, supportData = scanD(D, C1, minSupport) L = [L1] k = 2 while (len(L[k - 2]) > 0): Ck = aprioriGen(L[k - 2], k) Lk, supK = scanD(D, Ck, minSupport) #scan DB to get Lk supportData.update(supK) L.append(Lk) k += 1 #return L, supportData
def test1(): dataSet = apriori.loadDataSet() C1 = apriori.createC1(dataSet) L, supportData = apriori.apriori(dataSet, minSupport=0.5) # print(L) rules = apriori.generateRules(L, supportData, minConf=0.5)
import apriori as ap dataSet = ap.loadDataSet() #print dataSet C1 = ap.createC1(dataSet) #print C1 D = map(set, dataSet) #print D L1, suppData0 = ap.scanD(D, C1, 0.5) #print suppData0 L, S = ap.apriori(D, 0.5) #print L print L List = ap.generateRules(L, S, minConf=0.4) print List
import apriori # 导入数据集 dataSet = apriori.loadDataSet('Groceries.csv') L, suppData = apriori.apriori(dataSet, minSupport=0.03) # minSupport<0.05 rules = apriori.generateRules(L, suppData, minConf=0.2) # minConf<4 print(rules)
def test(): dataSet = apriori.loadDataSet() print "DataSet:", dataSet L,suppData = apriori.apriori(dataSet) rules = apriori.generateRules(L, suppData, minConf=0.5) print rules