def gen_rules(filepath, args): if not os.path.exists(filepath): print(filepath, 'not exists,please set the filepath') print('\n\n\n') print( '------------------------------处理文件%s-----------------------------------' % (os.path.basename(filepath))) with open(filepath, encoding='utf-8') as f: dataSet = [line.split() for line in f.readlines()] if len(dataSet) <= 100: ##如果交易数据少于100条,返回空 print( '-----------------------交易数据小于100条,不生成rules-------------------------' ) return [] n = args.support * len(dataSet) initSet = fpgrowth.createInitSet(dataSet) myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n) freqItems = [] fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems) # for x in freqItems: # print(x) # compute support values of freqItems suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(dataSet)) suppData[frozenset([])] = 1.0 # for x, v in suppData.items(): # print(x, v) # freqItems = [frozenset(x) for x in freqItems] # print(freqItems) rules = fpgrowth.generateRules(freqItems, suppData, minConf=args.confidence) filter_rules = [rule for rule in rules if len(rule[1]) == 1] filter_rules = sorted(filter_rules, key=lambda p: p[2], reverse=True) print('number of association rules:\n', len(filter_rules)) return filter_rules
# myFPtree.disp() # print fpgrowth.findPrefixPath('z', myHeaderTab) # print fpgrowth.findPrefixPath('r', myHeaderTab) # print fpgrowth.findPrefixPath('t', myHeaderTab) # freqItems = [] # fpgrowth.mineFPtree(myFPtree, myHeaderTab, 3, set([]), freqItems) # for x in freqItems: # print x '''kosarak data''' start = time.time() n = 20000 with open("E:\dvancedos\database\webdocs.dat", "rb") as f: parsedDat = [line.split() for line in f.readlines()] initSet = fpgrowth.createInitSet(parsedDat) myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n) freqItems = [] fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems) for x in freqItems: print(x) print(time.time() - start, 'sec') # compute support values of freqItems suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(parsedDat)) suppData[frozenset([])] = 1.0 for x, v in suppData.items(): print(x, v) freqItems = [frozenset(x) for x in freqItems] fpgrowth.generateRules(freqItems, suppData)