Esempio n. 1
0
def mineTweets(inputfile, minSup=5):
    parsedList = []
    for line in open(inputfile).readlines():
        parsedList.append(textParse(line.strip()))
    initSet = fpgrowth.createInitSet(parsedList)
    myFPTree, myHeaderTab = fpgrowth.createTree(initSet, minSup)
    myFreqList = []
    fpgrowth.mineTree(myFPTree, myHeaderTab, minSup, set([]), myFreqList)
    return myFreqList
Esempio n. 2
0
def gen_rules(filepath, args):
    if not os.path.exists(filepath):
        print(filepath, 'not exists,please set the filepath')
    print('\n\n\n')
    print(
        '------------------------------处理文件%s-----------------------------------'
        % (os.path.basename(filepath)))
    with open(filepath, encoding='utf-8') as f:
        dataSet = [line.split() for line in f.readlines()]
    if len(dataSet) <= 100:  ##如果交易数据少于100条,返回空
        print(
            '-----------------------交易数据小于100条,不生成rules-------------------------'
        )
        return []
    n = args.support * len(dataSet)
    initSet = fpgrowth.createInitSet(dataSet)
    myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n)
    freqItems = []
    fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems)
    # for x in freqItems:
    #     print(x)
    # compute support values of freqItems
    suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(dataSet))
    suppData[frozenset([])] = 1.0
    # for x, v in suppData.items():
    #     print(x, v)

    # freqItems = [frozenset(x) for x in freqItems]
    # print(freqItems)

    rules = fpgrowth.generateRules(freqItems,
                                   suppData,
                                   minConf=args.confidence)
    filter_rules = [rule for rule in rules if len(rule[1]) == 1]
    filter_rules = sorted(filter_rules, key=lambda p: p[2], reverse=True)
    print('number of association rules:\n', len(filter_rules))
    return filter_rules
Esempio n. 3
0
# myFPtree.disp()

# print fpgrowth.findPrefixPath('z', myHeaderTab)
# print fpgrowth.findPrefixPath('r', myHeaderTab)
# print fpgrowth.findPrefixPath('t', myHeaderTab)

# freqItems = []
# fpgrowth.mineFPtree(myFPtree, myHeaderTab, 3, set([]), freqItems)
# for x in freqItems:
#     print x
'''kosarak data'''
start = time.time()
n = 20000
with open("E:\dvancedos\database\webdocs.dat", "rb") as f:
    parsedDat = [line.split() for line in f.readlines()]
initSet = fpgrowth.createInitSet(parsedDat)
myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n)
freqItems = []
fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems)
for x in freqItems:
    print(x)
print(time.time() - start, 'sec')

# compute support values of freqItems
suppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(parsedDat))
suppData[frozenset([])] = 1.0
for x, v in suppData.items():
    print(x, v)

freqItems = [frozenset(x) for x in freqItems]
fpgrowth.generateRules(freqItems, suppData)
Esempio n. 4
0
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 11 20:29:01 2018

@author: Admin
"""

import fpgrowth
import generaterules

#导入数据,转化为相应的形式
testdata = fpgrowth.loadSimpData()
print(testdata)
itemset = fpgrowth.createInitSet(testdata)

#设置参数,minSup为最小支持度,itemnumber为频繁集中的项数
minSup = 0.1
itemnumber = 3

#生成树和项头表
retTree, headerTable = fpgrowth.createTree(itemset, sum(itemset.values()), minSup=minSup)
print(itemset)
#生成频繁项
freqItems = {}
fpgrowth.mineTree(inTree=retTree, headerTable=headerTable, dataSetLen=sum(itemset.values()), itemnumber = itemnumber, minSup = minSup, freqItemList = freqItems)

#从频繁集中挖掘相关规则
minConf = 0.8 #最小置信度
minLift = 1.3 #最小提升度
ruleset = generaterules.generateRules(freqItems = freqItems, dataSetLen = sum(itemset.values()),  minConf = minConf, minLift = minLift)
Esempio n. 5
0
import fpgrowth
import time


#simple data

simDat = fpgrowth.loadSimpDat()
initSet = fpgrowth.createInitSet(simDat)
myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, 3)
myFPtree.disp()        #FP树

print(fpgrowth.findPrefixPath('z', myHeaderTab))
print(fpgrowth.findPrefixPath('r', myHeaderTab))
print(fpgrowth.findPrefixPath('x', myHeaderTab))

freqItems = []
fpgrowth.mineFPtree(myFPtree, myHeaderTab, 3, set([]), freqItems)
for x in freqItems:
     print(x)

     



#kosarak data
'''
start = time.time()
n = 100000
with open("H:/data/kosarak.dat/kosarak.dat", "rb") as f:
    parsedDat = [line.split() for line in f.readlines()]
initSet = fpgrowth.createInitSet(parsedDat)
Esempio n. 6
0
ruleset = []
temp = []
for i in range(5422):
    if(float(content[i].split(',')[-1]) > float(content[i+1].split(',')[-1])):
        temp.append(content[i])
        ruleset.append(temp)
        temp = []
    else:
        temp.append(content[i])

fitdata = []
for j in range(len(ruleset)):
    datamid = []
    for i in range(len(ruleset[j])):
        datamid.append(ruleset[j][i].split(',')[:-1])
    fitdata.append(datamid)

###fitdata contains the data for every week
itemset = fpgrowth.createInitSet(fitdata[2])
minSup = 0.1
itemnumber = 3
retTree, headerTable = fpgrowth.createTree(itemset, sum(itemset.values()), minSup=minSup)
freqItems = {}
fpgrowth.mineTree(inTree=retTree, headerTable=headerTable, dataSetLen=sum(itemset.values()), 
                  itemnumber = itemnumber, minSup = minSup, freqItemList = freqItems)
minConf = 0.7 
minLift = 0.9
ruleset = generaterules.generateRules(freqItems = freqItems, dataSetLen = sum(itemset.values()), 
                                      minConf = minConf, minLift = minLift)
print(ruleset)