def mineTweets(tweetArray, minSup=5):
    parsedList = []
    for i in range(14):
        for j in range(100):
            parsedList.append(textParse(tweetArray[i][j].text))

    initDict = FPGrowth.createInitSet(parsedList)
    twitterFPTree, twitterHeaderTab = FPGrowth.createTree(initDict, minSup)

    twitterFreqList = []
    FPGrowth.mineTree(twitterFPTree, twitterHeaderTab, set([]),
                      twitterFreqList)

    return twitterFreqList
def main():
    # Connect to database
    ppsd_data = connect_db('train')

    fuzzified_data = Fuzzification.fuzzify(ppsd_data)
    # print(fuzzified_data)

    # Insert Fuzzified Data
    insert_db(fuzzified_data, ppsd_data)

    # Apriori Algorithm
    fuzzy_csv = pd.read_csv('fuzzified.csv')

    # FP Grwoth
    start_time = time.time()

    rules, confi = FPGrowth.mine('fuzzified.csv')
    print("FP: --- %s seconds ---" % (time.time() - start_time))

    insert_fprules(rules, confi)
    # insert_fprulesCSV(rules,confi)
    start_time = time.time()
    ant, con, conf, lift = Apriori.mine('fuzzified.csv')
    print("Apriori: --- %s seconds ---" % (time.time() - start_time))
    insert_arules(ant, con, conf, lift)
Example #3
0
import FPGrowth

'''
rootNode = FPGrowth.treeNode('pyramid', 9, None)
rootNode.children['eye'] = FPGrowth.treeNode('eye', 13, None)
rootNode.children['phoenix'] = FPGrowth.treeNode('phoenix', 3, None)
rootNode.disp()
'''

'''
simpDat = FPGrowth.loadSimpDat()
initSet = FPGrowth.createInitSet(simpDat)
myFPtree, myHeaderTab = FPGrowth.createTree(initSet, 3)
# myFPtree.disp()

# print FPGrowth.findPrefixPath('t', myHeaderTab['t'][1])

freqItems = []
FPGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
print freqItems
'''

FPGrowth.kosarakTest("E:/TestDatas/MachineLearningInAction/Ch12/kosarak.dat")

            freq_sets, HTs, item_list = getPickledData(filename, min_sup,
                                                       confidence)

        except (OSError, IOError) as e:
            use_pickled = False

    if not use_pickled:

        if algo is 'ap':

            freq_sets, item_list = Apriori.aprioriAlgo(data_folder + filename,
                                                       min_sup)

        else:

            freq_sets, item_list = FPGrowth.fPGrowthAlgo(
                data_folder + filename, min_sup)

        HTs = HashTree.generateHTs(data_folder + filename, freq_sets, min_sup)

    max_freq_sets = FrequentSetOps.getMaxFreqSets(freq_sets)

    closed_freq_sets = FrequentSetOps.getClosedFreqSets(freq_sets, HTs)

    print(len(max_freq_sets))
    print(len(closed_freq_sets))

    rules = FrequentSetOps.findRules(freq_sets, HTs, confidence)

    HashTree.documentFreqSets(HTs, item_list, min_sup, filename)

    HashTree.documentRules(HTs, item_list, rules, min_sup, confidence,
Example #5
0
                        str = str + data[i+n]
                        if n != k-1:
                            str = str + " "
                    final_list.append(str)
                data_list = final_list

        result_list.append(data_list)
    return result_list

result = readFile(k=4)

# Apriori Method
# L, supportData = Apriori.apriori(result, minSupport=0.05)
# rules = Apriori.generateRules(L, supportData)
# print(L)
# print(supportData)
# print(rules)
# print("count Apriori: " + str(len(rules)))

# FPGrowth

initSet = FPGrowth.createInitSet(result)
myFPtree, myHeaderTab = FPGrowth.createFPtree(initSet, 8) #用数据集构造FP树,最小支持度10
freqItems = [] # 挖掘FP树
FPGrowth.mineFPtree(myFPtree, myHeaderTab, 8 , set([]), freqItems)
counta = 0
for x in freqItems:
    if len(x) != 1:
        print(x)
        counta = counta + 1
print("count: " + str(counta))
Example #6
0
# -*- coding: utf-8 -*-
"""

FPGrowth:从新闻网站点击流中挖掘
@author: Jerry
"""
import FPGrowth

if __name__ == '__main__':
    fr = open('kosarak.dat','r')
    parsedData = [line.split() for line in fr.readlines()]
    
    initDict = FPGrowth.createInitSet(parsedData)
    
    newsFPTree, newsHeaderTab = FPGrowth.createTree(initDict,100000)
    newsFPTree.disp()
    
    newsFreqItems = []
    FPGrowth.mineTree(newsFPTree, newsHeaderTab, 100000, set([]), newsFreqItems)
    print('newsFreqItems=',newsFreqItems)
Example #7
0
"""
import FPGrowth


# 创建数据集
def loadSimpDat():
    simpDat = [['r', 'z', 'h', 'j', 'p'],
               ['z', 'y', 'x', 'w', 'v', 'u', 't', 's'], ['z'],
               ['r', 'x', 'n', 'o', 's'], ['y', 'r', 'x', 'z', 'q', 't', 'p'],
               ['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]
    return simpDat


if __name__ == '__main__':
    simpleData = loadSimpDat()
    initDict = FPGrowth.createInitSet(simpleData)

    myFPTree, myHeaderTab = FPGrowth.createTree(initDict, 3)
    myFPTree.disp()

    print(
        '-----------------------------------------------------------------------'
    )
    print(FPGrowth.findPrefixPath('x', myHeaderTab['x'][1]))
    print(FPGrowth.findPrefixPath('z', myHeaderTab['z'][1]))
    print(FPGrowth.findPrefixPath('r', myHeaderTab['r'][1]))

    print(
        '-----------------------------------------------------------------------'
    )
Example #8
0
#! encoding utf-8
import part2
import Apriori
import FPGrowth

if __name__ == '__main__':
    #get buckets
    dataset = part2.LSH()

    # Apriori Method

    L, supportData = Apriori.apriori(dataset, minSupport=0.05)
    rules = Apriori.generateRules(L, supportData)
    print(L)
    print(supportData)
    print(rules)
    print("count Apriori: " + str(len(rules)))

    # FPGrowth

    initSet = FPGrowth.createInitSet(dataset)
    myFPtree, myHeaderTab = FPGrowth.createFPtree(initSet,
                                                  2)  #用数据集构造FP树,最小支持度10
    freqItems = []  # 挖掘FP树
    FPGrowth.mineFPtree(myFPtree, myHeaderTab, 2, set([]), freqItems)
    count = 0
    for x in freqItems:
        if len(x) != 1:
            print(x)
            count = count + 1
    print("count: " + str(count))
Example #9
0
import GSP
import time

if __name__ == '__main__':
    # dic = ReadData.readcsv_notime("../data/new4gtrain.csv")
    # dic_t = ReadData.readcsv_withtime("../data/new4gtrain.csv")
    dic = TestData.gettest()
    dic_t = TestData.getTimeTest()

    start = time.time()
    Afrequent, sup = Apriori.apriori(dataSet=dic, minSupport=2)
    end = time.time()
    ATime = end - start

    start = time.time()
    Ffrequent = FPGrowth.FPgrowth(dataSet=dic, minsupport=2)
    end = time.time()
    FTime = end - start

    start = time.time()
    Gfrequent = GSP.gsp(dataSet=dic_t, minsupport=2)
    end = time.time()
    GTime = end - start

    start = time.time()
    Sfrequent = GSP.gsp(dataSet=dic_t, minsupport=2)
    end = time.time()
    STime = end - start

    print dic
    print "Apriori:  time %.3fs    FrequentSet: %s" % (ATime, Afrequent)
Example #10
0
import FPGrowth
'''
rootNode = FPGrowth.treeNode('pyramid', 9, None)
rootNode.children['eye'] = FPGrowth.treeNode('eye', 13, None)
rootNode.children['phoenix'] = FPGrowth.treeNode('phoenix', 3, None)
rootNode.disp()
'''
'''
simpDat = FPGrowth.loadSimpDat()
initSet = FPGrowth.createInitSet(simpDat)
myFPtree, myHeaderTab = FPGrowth.createTree(initSet, 3)
# myFPtree.disp()

# print FPGrowth.findPrefixPath('t', myHeaderTab['t'][1])

freqItems = []
FPGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
print freqItems
'''

FPGrowth.kosarakTest("E:/TestDatas/MachineLearningInAction/Ch12/kosarak.dat")