def _get_frequent_features(self):
     """Frequent Features are found using apriori algorithm"""
     feature_terms = [sub_items for items in self.df['noun_and_np'].values for sub_items in items]
     C1 = apriori.createC1(feature_terms)
     D = map(set, feature_terms)
     L1, support_data = apriori.scanD(D,C1,0.01) # minimum support 0.01
     self.frequent_features = map(lambda x: "".join(list(x)), L1)
Ejemplo n.º 2
0
class Test:
    if __name__ == "__main__":
        #fza=frozenset(['a','bc'])
        #adict={fza:1,'b':2}
        #print(adict)
        # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串;
        #cc= loadDataSet()
        #createC1(cc)
        #c=[6,5,4,9,8,3,5,6,8,1]
        #c.sort()
        #print(c)
        dataSet = apriori.loadDataSet()
        print(dataSet)
        C1 = apriori.createC1(dataSet)
        C1
        D = map(set, dataSet)
        L1 = []
        supportData = []
        (L1, supportData) = apriori.scanD(D, C1, 0.5)
        print(L1)
        print(supportData)
Ejemplo n.º 3
0
# coding:utf-8

import apriori

# 发现频繁项集和发现关联规则

dataSet = apriori.loadDataSet()
print(dataSet)

C1 = apriori.createC1(dataSet)
print(C1)

D = map(set, dataSet)
print(D)

L1, suppData0 = apriori.scanD(D, C1, 0.5)
print(L1)

L, suppData = apriori.apriori(dataSet)
print(L)

L, suppData = apriori.apriori(dataSet, minSupport=0.5)
rules = apriori.generateRules(L, suppData, minConf=0.7)
print rules

rules = apriori.generateRules(L, suppData, minConf=0.5)
print rules
Ejemplo n.º 4
0
#!/usr/bin/env python
import sys
from apriori import createC1
from apriori import scanD
from apriori import aprioriGen

a = []
for i in sys.stdin:
    i = i.strip()
    v = i.split()
    a.append(v)
c1 = createC1(a)
d = map(set, a)

K = 2
ps = 0.3

for i in range(K):
    L1, sp = scanD(d, c1, ps)
    c1 = aprioriGen(L1, i + 1)

for i in L1:
    x, y = i
    x = int(x)
    y = int(y)
    print "%d %d" % (x, y)
Ejemplo n.º 5
0
Created on Tue May 16 10:26:18 2017

@author: 凯风
"""

from imp import reload
import apriori

reload(apriori)
dataSet = apriori.loadDataSet()  # 获取数据
dataSet
C1 = apriori.creadteC1(dataSet)  # 获取数据集的C1-候选项集合
C1
D = list(map(set, dataSet))  # 把数据转换成集合的形式存放在列表中
D
L1, supportData0 = apriori.scanD(
    D, C1, 0.5)  # 以0.5支持度为要求,计算候选集的每一个项的支持度,并返回大于支持度的集合L1
L1
supportData0

# 根据支持度生成频繁集
reload(apriori)
L, supportData = apriori.apriori(dataSet)
L  # 获得支持度大于0.5的频繁集合
L[0]  # 包含一个元素的
L[1]  # 包含两个元素的
L[2]  # 包含三个元素的
L[3]
apriori.aprioriGen(L[0], 2)  # 看一下如何生成的未和支持度比较的‘L[1]’
L, supportData = apriori.apriori(dataSet, minSupport=0.7)  # 更大的支持度,获得少的结果了

# 根据可信度生成关联规则

def aprioriGen(Lk, k):  #creates Ck
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):
            L1 = list(Lk[i])[:k - 2]
            L2 = list(Lk[j])[:k - 2]
            L1.sort()
            L2.sort()
            if L1 == L2:  #if first k-2 elements are equal
                retList.append(Lk[i] | Lk[j])  #set union
    return retList


dataSet = loadDataSet()
minSupport = 0.5
'''def apriori(dataSet, minSupport = 0.5):'''
C1 = createC1(dataSet)
D = list(map(set, dataSet))
L1, supportData = scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k - 2]) > 0):
    Ck = aprioriGen(L[k - 2], k)
    Lk, supK = scanD(D, Ck, minSupport)  #scan DB to get Lk
    supportData.update(supK)
    L.append(Lk)
    k += 1
#return L, supportData
Ejemplo n.º 7
0
Experiments with apriori
'''

import apriori
import random
import loadText

support = 0.4
loadText.importFromFile('spanish_db.txt')
dataset = loadText.rawPriori
#print dataset
C1 = apriori.createC1(dataset)
#print 'C1', C1
D = map(set,dataset)
#print 'D', D
L1, support_data = apriori.scanD(D,C1,support)
#print 'L1', L1
#print 'support_data', support_data
k_length = 2
transactions = apriori.aprioriGen(L1, k_length)
#print 'transactions', transactions
#print '\n*** *** ***'
L,support_data = apriori.apriori(dataset, support)
#print 'L', L
#print 'support_data', support_data
rules = apriori.generateRules(L, support_data, min_confidence=0.7)
#print 'rules', rules

ruleDict = apriori.generateRuleDict(rules)

'''
Ejemplo n.º 8
0
Experiments with apriori
'''

import apriori
import random
import loadText

support = 0.1
loadText.importFromFile('snowflakes_db.txt')
dataset = loadText.rawPriori
#print dataset
C1 = apriori.createC1(dataset)
#print 'C1', C1
D = map(set, dataset)
#print 'D', D
L1, support_data = apriori.scanD(D, C1, support)
#print 'L1', L1
#print 'support_data', support_data
print 'support_data'
for k, v in support_data.iteritems():
    print k, v
k_length = 2
transactions = apriori.aprioriGen(L1, k_length)
#print 'transactions', transactions
#print '\n*** *** ***'
L, support_data = apriori.apriori(dataset, support)
#print 'L', L
#print 'support_data', support_data
rules = apriori.generateRules(L, support_data, min_confidence=0.7)
#print 'rules', rules
Ejemplo n.º 9
0
import apriori

dataSet = apriori.loadDataSet()
print "dataSet"
print dataSet
C1 = apriori.createC1(dataSet)
print "C1"
print C1

D=map(set, dataSet)
print "D"
print D

L1, suppData0 = apriori.scanD(D, C1, 0.5)
print "L1"
print L1
print "suppData0"
print suppData0


L,suppData = apriori.apriori(dataSet, minSupport=0.5)
print "L"
print L
print "suppData"
print suppData

rules = apriori.generateRules(L, suppData, minConf=0.7)
print "rules"
print rules

rules = apriori.generateRules(L, suppData, minConf=0.5)
    'holocaust-history', 'dominican-republic-environmental-citizenship'
]
df = pd.DataFrame(et_tours, columns=tour_names)
#print(df.shape)
#print(df.head)

for index in range(len(tour_names)):
    #        tour_to_name = {'0' : np.nan, '1' : tour_names[index],'2' : tour_names[index],'1' : tour_names[index],'3' : tour_names[index],'4' : tour_names[index],'5' : tour_names[index],'6' : tour_names[index],'7' : tour_names[index]}
    tour_to_name = {0: np.nan, 1: tour_names[index]}
    df[tour_names[index]] = df[tour_names[index]].map(tour_to_name)

print(df.head)

C1 = createC1(df)  #new stuff
D = map(set, df)  #new stuff
L1, support_data = scanD(D, C1, 0.0000005)  #new stuff

my_data = list()

for index in range(df.shape[0]):
    basket = list(df.ix[index].dropna())
    my_data.append(basket)

L, suppData = apriori(my_data)

print('Identified rules with support = ', alpha, 'and confidence= ', beta)
rules = generateRules(L, suppData, minConf=beta)
n_other_items = 1
while n_other_items <= max_other_items:
    print('\nRules with ', n_other_items, 'other item(s)')
    for item in L[n_other_items]:
Ejemplo n.º 11
0

import apriori as ap

dataSet = ap.loadDataSet()
#print dataSet
C1 = ap.createC1(dataSet)
#print C1
D = map(set, dataSet)
#print D
L1, suppData0 = ap.scanD(D, C1, 0.5)
#print suppData0
L, S = ap.apriori(D, 0.5)
#print L

print L

List = ap.generateRules(L, S, minConf=0.4)
print List