def FP_growth(self,headnode,headtable): a=self.a if tree.checkTreeOneWay(headnode): add=unit.generateCombination(headtable,a,self.support) if len(add)>0: tree.frequent+=add #print('frequent') #print('1',tree.frequent) pass else: for item in headtable: #datas为条件模式基 datas=unit.generateSubset(headtable,item,self.a,tree.frequent) if datas: #print('2',item) if item: x=a[:] x.append(item) f=FP_Grow_tree.FP_Grow_tree(datas,x,self.support) #print('----------------ddddd-') #print(item,f.f.pretable) for jix in f.f.pretable: xx=a[:] xx.append(item) xx.append(jix[0]) tree.frequent.append((",".join(str(i) for i in xx),jix[1])) pass pass pass pass
arr.append(list) list = [] return arr sample = [['milk', 'eggs', 'bread', 'chips'], ['eggs', 'popcorn', 'chips', 'beer'], ['eggs', 'bread', 'chips'], ['milk', 'eggs', 'bread', 'popcorn', 'chips', 'beer'], ['milk', 'bread', 'beer'], ['eggs', 'bread', 'beer'], ['milk', 'bread', 'chips'], ['milk', 'eggs', 'bread', 'butter', 'chips'], ['milk', 'eggs', 'butter', 'chips']] sample2 = [['r', 'z', 'h', 'j', 'p'], ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'], ['z'], ['r', 'x', 'n', 'o', 's'], ['y', 'r', 'x', 'z', 'q', 't', 'p'], ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']] sample1 = [[u'牛奶', u'鸡蛋', u'面包', u'薯片'], [u'鸡蛋', u'爆米花', u'薯片', u'啤酒'], [u'鸡蛋', u'面包', u'薯片'], [u'牛奶', u'鸡蛋', u'面包', u'爆米花', u'薯片', u'啤酒'], [u'牛奶', u'面包', u'啤酒'], [u'鸡蛋', u'面包', u'啤酒'], [u'牛奶', u'面包', u'薯片'], [u'牛奶', u'鸡蛋', u'面包', u'黄油', u'薯片'], [u'牛奶', u'鸡蛋', u'黄油', u'薯片']] sample3 = add_matix() #print(sample1) ##参数说明 sample为事务数据集 []为递归过程中的基,support为最小支持度 support = 200 ff = FP_Grow_tree.FP_Grow_tree(sample3, [], support) ##打印频繁集 ff.printfrequent() ff.printconfident(0.9)
data.iloc[r,c]=data.iloc[0,c] data.to_excel("./data666.xls") """ data = pd.read_excel("./data666.xls", encoding="utf-8") # 将交易记录变成列表的形式 dataSet = [] for i in range(1, 748): d1 = data.iloc[i, :].tolist() d1 = [x for x in d1 if str(x) != 'nan'] dataSet.append(d1) #print(dataSet) # ========================================================================================= print(">>>>>>>>>>>>>>> 使用FP_Growth_tree >>>>>>>>>>>>>>>>>>>>>") time1 = time.time() support = 300 # 支持度设为300 ff = FP_Grow_tree.FP_Grow_tree(dataSet, [], support) # 打印频繁集 ff.printfrequent() time2 = time.time() print('FP_Growth_tree耗时:', str(time2 - time1)) # ======================================================================================= print(">>>>>>>>>>>>>>> 使用Apriori >>>>>>>>>>>>>>>>>>>>>") time1 = time.time() l, suppdata = apri.apriori(dataSet) # 对数据集使用apriori算法 print(l) rules = apri.generateRules(l, suppdata, minconf=0.7) # 生成关联规则,最小置信度设为0.7 print(rules) time2 = time.time() print('Apriori耗时:', time2 - time1)