def mineTweets(tweetArray, minSup=5): parsedList = [] for i in range(14): for j in range(100): parsedList.append(textParse(tweetArray[i][j].text)) initDict = FPGrowth.createInitSet(parsedList) twitterFPTree, twitterHeaderTab = FPGrowth.createTree(initDict, minSup) twitterFreqList = [] FPGrowth.mineTree(twitterFPTree, twitterHeaderTab, set([]), twitterFreqList) return twitterFreqList
def main(): # Connect to database ppsd_data = connect_db('train') fuzzified_data = Fuzzification.fuzzify(ppsd_data) # print(fuzzified_data) # Insert Fuzzified Data insert_db(fuzzified_data, ppsd_data) # Apriori Algorithm fuzzy_csv = pd.read_csv('fuzzified.csv') # FP Grwoth start_time = time.time() rules, confi = FPGrowth.mine('fuzzified.csv') print("FP: --- %s seconds ---" % (time.time() - start_time)) insert_fprules(rules, confi) # insert_fprulesCSV(rules,confi) start_time = time.time() ant, con, conf, lift = Apriori.mine('fuzzified.csv') print("Apriori: --- %s seconds ---" % (time.time() - start_time)) insert_arules(ant, con, conf, lift)
import FPGrowth ''' rootNode = FPGrowth.treeNode('pyramid', 9, None) rootNode.children['eye'] = FPGrowth.treeNode('eye', 13, None) rootNode.children['phoenix'] = FPGrowth.treeNode('phoenix', 3, None) rootNode.disp() ''' ''' simpDat = FPGrowth.loadSimpDat() initSet = FPGrowth.createInitSet(simpDat) myFPtree, myHeaderTab = FPGrowth.createTree(initSet, 3) # myFPtree.disp() # print FPGrowth.findPrefixPath('t', myHeaderTab['t'][1]) freqItems = [] FPGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems) print freqItems ''' FPGrowth.kosarakTest("E:/TestDatas/MachineLearningInAction/Ch12/kosarak.dat")
freq_sets, HTs, item_list = getPickledData(filename, min_sup, confidence) except (OSError, IOError) as e: use_pickled = False if not use_pickled: if algo is 'ap': freq_sets, item_list = Apriori.aprioriAlgo(data_folder + filename, min_sup) else: freq_sets, item_list = FPGrowth.fPGrowthAlgo( data_folder + filename, min_sup) HTs = HashTree.generateHTs(data_folder + filename, freq_sets, min_sup) max_freq_sets = FrequentSetOps.getMaxFreqSets(freq_sets) closed_freq_sets = FrequentSetOps.getClosedFreqSets(freq_sets, HTs) print(len(max_freq_sets)) print(len(closed_freq_sets)) rules = FrequentSetOps.findRules(freq_sets, HTs, confidence) HashTree.documentFreqSets(HTs, item_list, min_sup, filename) HashTree.documentRules(HTs, item_list, rules, min_sup, confidence,
str = str + data[i+n] if n != k-1: str = str + " " final_list.append(str) data_list = final_list result_list.append(data_list) return result_list result = readFile(k=4) # Apriori Method # L, supportData = Apriori.apriori(result, minSupport=0.05) # rules = Apriori.generateRules(L, supportData) # print(L) # print(supportData) # print(rules) # print("count Apriori: " + str(len(rules))) # FPGrowth initSet = FPGrowth.createInitSet(result) myFPtree, myHeaderTab = FPGrowth.createFPtree(initSet, 8) #用数据集构造FP树,最小支持度10 freqItems = [] # 挖掘FP树 FPGrowth.mineFPtree(myFPtree, myHeaderTab, 8 , set([]), freqItems) counta = 0 for x in freqItems: if len(x) != 1: print(x) counta = counta + 1 print("count: " + str(counta))
# -*- coding: utf-8 -*- """ FPGrowth:从新闻网站点击流中挖掘 @author: Jerry """ import FPGrowth if __name__ == '__main__': fr = open('kosarak.dat','r') parsedData = [line.split() for line in fr.readlines()] initDict = FPGrowth.createInitSet(parsedData) newsFPTree, newsHeaderTab = FPGrowth.createTree(initDict,100000) newsFPTree.disp() newsFreqItems = [] FPGrowth.mineTree(newsFPTree, newsHeaderTab, 100000, set([]), newsFreqItems) print('newsFreqItems=',newsFreqItems)
""" import FPGrowth # 创建数据集 def loadSimpDat(): simpDat = [['r', 'z', 'h', 'j', 'p'], ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'], ['z'], ['r', 'x', 'n', 'o', 's'], ['y', 'r', 'x', 'z', 'q', 't', 'p'], ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']] return simpDat if __name__ == '__main__': simpleData = loadSimpDat() initDict = FPGrowth.createInitSet(simpleData) myFPTree, myHeaderTab = FPGrowth.createTree(initDict, 3) myFPTree.disp() print( '-----------------------------------------------------------------------' ) print(FPGrowth.findPrefixPath('x', myHeaderTab['x'][1])) print(FPGrowth.findPrefixPath('z', myHeaderTab['z'][1])) print(FPGrowth.findPrefixPath('r', myHeaderTab['r'][1])) print( '-----------------------------------------------------------------------' )
#! encoding utf-8 import part2 import Apriori import FPGrowth if __name__ == '__main__': #get buckets dataset = part2.LSH() # Apriori Method L, supportData = Apriori.apriori(dataset, minSupport=0.05) rules = Apriori.generateRules(L, supportData) print(L) print(supportData) print(rules) print("count Apriori: " + str(len(rules))) # FPGrowth initSet = FPGrowth.createInitSet(dataset) myFPtree, myHeaderTab = FPGrowth.createFPtree(initSet, 2) #用数据集构造FP树,最小支持度10 freqItems = [] # 挖掘FP树 FPGrowth.mineFPtree(myFPtree, myHeaderTab, 2, set([]), freqItems) count = 0 for x in freqItems: if len(x) != 1: print(x) count = count + 1 print("count: " + str(count))
import GSP import time if __name__ == '__main__': # dic = ReadData.readcsv_notime("../data/new4gtrain.csv") # dic_t = ReadData.readcsv_withtime("../data/new4gtrain.csv") dic = TestData.gettest() dic_t = TestData.getTimeTest() start = time.time() Afrequent, sup = Apriori.apriori(dataSet=dic, minSupport=2) end = time.time() ATime = end - start start = time.time() Ffrequent = FPGrowth.FPgrowth(dataSet=dic, minsupport=2) end = time.time() FTime = end - start start = time.time() Gfrequent = GSP.gsp(dataSet=dic_t, minsupport=2) end = time.time() GTime = end - start start = time.time() Sfrequent = GSP.gsp(dataSet=dic_t, minsupport=2) end = time.time() STime = end - start print dic print "Apriori: time %.3fs FrequentSet: %s" % (ATime, Afrequent)