예제 #1
0
파일: tree.py 프로젝트: YXWisaboy/MYPYTHON
	def FP_growth(self,headnode,headtable):
		a=self.a
		if tree.checkTreeOneWay(headnode):
			add=unit.generateCombination(headtable,a,self.support)
			if len(add)>0:
				tree.frequent+=add
			#print('frequent')
			#print('1',tree.frequent)
			pass
		else:
			for item in headtable:
				#datas为条件模式基
				datas=unit.generateSubset(headtable,item,self.a,tree.frequent)
				if datas:
					#print('2',item)
					if item:
						x=a[:]
						x.append(item)
						f=FP_Grow_tree.FP_Grow_tree(datas,x,self.support)
						#print('----------------ddddd-')
						#print(item,f.f.pretable)
						for jix in f.f.pretable:
							xx=a[:]
							xx.append(item)
							xx.append(jix[0])
							tree.frequent.append((",".join(str(i) for i in xx),jix[1]))
							pass
				pass
			pass
		pass
예제 #2
0
        arr.append(list)
        list = []
    return arr


sample = [['milk', 'eggs', 'bread', 'chips'],
          ['eggs', 'popcorn', 'chips', 'beer'], ['eggs', 'bread', 'chips'],
          ['milk', 'eggs', 'bread', 'popcorn', 'chips', 'beer'],
          ['milk', 'bread', 'beer'], ['eggs', 'bread', 'beer'],
          ['milk', 'bread', 'chips'],
          ['milk', 'eggs', 'bread', 'butter', 'chips'],
          ['milk', 'eggs', 'butter', 'chips']]
sample2 = [['r', 'z', 'h', 'j', 'p'], ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],
           ['z'], ['r', 'x', 'n', 'o', 's'],
           ['y', 'r', 'x', 'z', 'q', 't', 'p'],
           ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]
sample1 = [[u'牛奶', u'鸡蛋', u'面包', u'薯片'], [u'鸡蛋', u'爆米花', u'薯片', u'啤酒'],
           [u'鸡蛋', u'面包', u'薯片'], [u'牛奶', u'鸡蛋', u'面包', u'爆米花', u'薯片', u'啤酒'],
           [u'牛奶', u'面包', u'啤酒'], [u'鸡蛋', u'面包', u'啤酒'], [u'牛奶', u'面包', u'薯片'],
           [u'牛奶', u'鸡蛋', u'面包', u'黄油', u'薯片'], [u'牛奶', u'鸡蛋', u'黄油', u'薯片']]
sample3 = add_matix()

#print(sample1)
##参数说明 sample为事务数据集 []为递归过程中的基,support为最小支持度

support = 200
ff = FP_Grow_tree.FP_Grow_tree(sample3, [], support)
##打印频繁集
ff.printfrequent()
ff.printconfident(0.9)
예제 #3
0
            data.iloc[r,c]=data.iloc[0,c]
data.to_excel("./data666.xls")
"""
data = pd.read_excel("./data666.xls", encoding="utf-8")
# 将交易记录变成列表的形式
dataSet = []
for i in range(1, 748):
    d1 = data.iloc[i, :].tolist()
    d1 = [x for x in d1 if str(x) != 'nan']
    dataSet.append(d1)
    #print(dataSet)

# =========================================================================================
print(">>>>>>>>>>>>>>> 使用FP_Growth_tree >>>>>>>>>>>>>>>>>>>>>")
time1 = time.time()
support = 300  # 支持度设为300
ff = FP_Grow_tree.FP_Grow_tree(dataSet, [], support)
# 打印频繁集
ff.printfrequent()
time2 = time.time()
print('FP_Growth_tree耗时:', str(time2 - time1))

# =======================================================================================
print(">>>>>>>>>>>>>>> 使用Apriori >>>>>>>>>>>>>>>>>>>>>")
time1 = time.time()
l, suppdata = apri.apriori(dataSet)  # 对数据集使用apriori算法
print(l)
rules = apri.generateRules(l, suppdata, minconf=0.7)  # 生成关联规则,最小置信度设为0.7
print(rules)
time2 = time.time()
print('Apriori耗时:', time2 - time1)