def test():
    simpDat = fpGrowth.loadSimpDat()
    initSet = fpGrowth.createInitSet(simpDat)
    myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
    myFPtree.disp()
    freqItems = []
    fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
import fpGrowth
rootNode = fpGrowth.treeNode('pyramid', 9, None)
rootNode.children['eye'] = fpGrowth.treeNode('eye', 13, None)
rootNode.disp()
rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix', 3, None)
rootNode.disp()

from importlib import reload
reload(fpGrowth)
simpDat = fpGrowth.loadSimpDat()
simpDat
initSet = fpGrowth.createInitSet(simpDat)
initSet
# 创建FP树
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFPtree.disp()

reload(fpGrowth)
fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
fpGrowth.findPrefixPath('z', myHeaderTab['z'][1])
fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])

reload(fpGrowth)
freqItems = []
fpGrowth.mineTree(myFPtree, myHeaderTab, 3, set([]), freqItems)
freqItems

# 示例:从新闻网站点击流中挖掘
parsedDat = [line.split() for line in open('kosarak.dat').readlines()]
initSet = fpGrowth.createInitSet(parsedDat)
myFPtree, myHeaderTab = fpGrowth.createTree(initSet, 100000)
import fpGrowth

simpDat = fpGrowth.loadSimpDat()
# print simpDat

initSet = fpGrowth.createInitSet(simpDat)
# print initSet

myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3)
# print myFpTree.disp()
# print myHeaderTab

myCondPat = fpGrowth.findPrefixPath('x', myHeaderTab['x'][1])
# print myCondPat

freqItems = []

myfpGrowth = fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems)
# print myFpTree.disp()
Exemple #4
0
# -*- coding:utf-8 -*-
import fpGrowth

#算法基本过程:
#1.创建FP树的数据结构
#2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项
#3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行
#4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。
#    如果树中已经存在现有元素,则增加现有元素的值
#    如果该路径不存在,则创建一条新路径。

###测试FP数的数据结构
#rootNode = fpGrowth.treeNode('pyramid',9,None)
#rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
#rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
#rootNode.disp()


simData = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simData)
myFpTree,myHeaderTab = fpGrowth.createTree(initSet, 3)
myFpTree.disp()

myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])
print ('myCondPats is' , myCondPats)

freqItems = []
fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set ([]), freqItems)
print('频繁项集 is' , freqItems)

Exemple #5
0
# -*- coding: utf-8 -*-
'''
Created on 2016年6月22日

@author: xiaoyuan
'''

import fpGrowth

# #testing create a tree datastructor
# rootNode = fpGrowth.treeNode('pyramid',9,None)
# rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
# rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
# rootNode.disp()
#
#
# simDat = fpGrowth.loadSimpDat()
# initSet = fpGrowth.createInitSet(simDat)
# myFPtree,myHeaderTab = fpGrowth.createTree(initSet,3)
# myFPtree.disp()
# condPattBases =  fpGrowth.findPrefixPath('x',myHeaderTab['x'][1])

dateSet = fpGrowth.loadSimpDat()
freqItems = fpGrowth.fpGrowth(dateSet)
Exemple #6
0

if __name__ == '__main__':

    # takes all bacterias names from bactTaxa_Habitant that are animal or marine
    bacteria_names()
    category = "Lipid transport and metabolism"
    # list of all cogs that belong to catagory
    function_cogs_list = build_category_list(category)

    # dict = {bac_name, {words_list, lable}}
    dict, animal_counter, marine_counter = write_words(function_cogs_list)

    min_sup = 100
    output = open("output.txt", "w")
    simpDat = fpGrowth.loadSimpDat(dict)
    initSet = fpGrowth.createInitSet(simpDat)
    myFPtree, myHeaderTab = fpGrowth.createTree(initSet, min_sup)
    if not myHeaderTab:
        print("header table empty")
        print("--- %s seconds ---" % (time.time() - start_time))
        exit(1)
    freqItems = []
    fpGrowth.mineTree(myFPtree, myHeaderTab, min_sup, set([]), freqItems)

    class0 = marine_counter / (marine_counter + animal_counter)
    class1 = animal_counter / (marine_counter + animal_counter)
    Hc = entropy(class0, class1)
    dict_IG_freq = {}

    # claculate IG to all the frequent itemsets, returns also how many time each freq_itemset appears in lable 1 and 0
Exemple #7
0
@author: xiaoyuan
'''


import  fpGrowth


# #testing create a tree datastructor
# rootNode = fpGrowth.treeNode('pyramid',9,None)
# rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
# rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
# rootNode.disp()
# 
# 
# simDat = fpGrowth.loadSimpDat()
# initSet = fpGrowth.createInitSet(simDat)
# myFPtree,myHeaderTab = fpGrowth.createTree(initSet,3)
# myFPtree.disp()
# condPattBases =  fpGrowth.findPrefixPath('x',myHeaderTab['x'][1])

dateSet = fpGrowth.loadSimpDat()
freqItems = fpGrowth.fpGrowth(dateSet)








# -*- coding:utf-8 -*-
import fpGrowth

#算法基本过程:
#1.创建FP树的数据结构
#2.第一次遍历数据集会获得每个元素项的出现频率。 去掉不满足支持度的元素项
#3.对每个事务(即每个记录)中的集合进行排序。排序基于元素项的绝对出现频率来进行
#4.构建FP树。从空集开始,向其中不断添加频繁项集。即在构建时,读入每个事务中的项集,并将其添加到已存在的路径中。
#    如果树中已经存在现有元素,则增加现有元素的值
#    如果该路径不存在,则创建一条新路径。

###测试FP数的数据结构
#rootNode = fpGrowth.treeNode('pyramid',9,None)
#rootNode.children['eye'] = fpGrowth.treeNode('eye',13,None)
#rootNode.children['phoenix'] = fpGrowth.treeNode('phoenix',3,None)
#rootNode.disp()

simData = fpGrowth.loadSimpDat()
initSet = fpGrowth.createInitSet(simData)
myFpTree, myHeaderTab = fpGrowth.createTree(initSet, 3)
myFpTree.disp()

myCondPats = fpGrowth.findPrefixPath('r', myHeaderTab['r'][1])
print('myCondPats is', myCondPats)

freqItems = []
fpGrowth.mineTree(myFpTree, myHeaderTab, 3, set([]), freqItems)
print('频繁项集 is', freqItems)