def getCooccur(ts,groups,reverse,min_s=2,min_c=0.5): crules=dict() result=dict() relim_input=itemmining.get_relim_input(ts) itemset=itemmining.relim(relim_input,min_support=min_s) rules=assocrules.mine_assoc_rules(itemset,min_support=min_s\ ,min_confidence=min_c) # Now calculate the best rule for each cis # Clean the rules for rule in rules: if len(rule[0])>1: continue else: if not rule[0] in crules.keys(): crules[rule[0]]=dict() for elem in rule[1]: crules[rule[0]][elem]=rule[3] for x in reverse.keys(): if not frozenset({x}) in crules.keys(): continue for y in reverse.keys(): if not y in crules[frozenset({x})].keys(): continue result[(x,y)]=crules[frozenset({x})][y] return result
def associationRules(transactions, userid, followed=(), c_userid=None): relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=2) rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5) recom_user = {} for rule_user in rules: if userid in rule_user[0] and not any( map(rule_user[1].__contains__, followed)) and not c_userid in rule_user[1]: # 支持度 support = rule_user[2] / len(transactions) # リフト値 1より大きい場合は、Aが発生するとBが発生しやすくなると解釈できる lift = (rule_user[3] / support, ) if lift[0] <= 1: continue rule_user += lift recom_user[rule_user[1]] = rule_user[4] recom_user_sorted = sorted(recom_user.items(), key=lambda x: x[1], reverse=True) print("*" * 100) print("ユーザーレコメンド(バスケット分析)") print(recom_user_sorted) print("*" * 100) rcom_userid_list = set() for rcom_userid in recom_user_sorted: rcom_userid_list = rcom_userid_list.union(rcom_userid[0]) return list(rcom_userid_list)
def printPyminingResult(transactions, support, confidence): print '\n\nPymining algorithm:' relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=support * 196) print 'Frequent item set:( size:', len(item_sets), ')' for key in item_sets: print '[', for keys in key: print keys, ',', print '], ', rules = assocrules.mine_assoc_rules(item_sets, min_support=support * 196, min_confidence=confidence) print '\n\nRules:' for rule in rules: print '[', for _ in rule[0]: print _, if (len(rule[0]) > 1): print ',', print '->', for _ in rule[1]: print _, if (len(rule[1]) > 1): print ',', print '], confidence:', rule[3], ', support:', rule[2] / float(196)
def main(transactions, min_sup, min_conf): item_mining = freq_mining(transactions, min_sup, min_conf) freq_items = item_mining.freq_items() # rules = item_mining.association_rules() rules = assocrules.mine_assoc_rules(freq_items, item_mining.min_sup, item_mining.min_conf) print(freq_items)
def testConfidence075(self): ts1 = perftesting.get_default_transactions() relim_input = itemmining.get_relim_input(ts1) report = itemmining.relim(relim_input, 2) rules = assocrules.mine_assoc_rules(report, min_support=2, min_confidence=0.75) self.assertEqual(8, len(rules)) a_rule = (frozenset(["b"]), frozenset(["d"]), 6, 0.75) self.assertTrue(a_rule in rules)
def testDefaultSupportConf(self): ts1 = perftesting.get_default_transactions() relim_input = itemmining.get_relim_input(ts1) report = itemmining.relim(relim_input, 2) rules = assocrules.mine_assoc_rules(report, min_support=2) self.assertEqual(23, len(rules)) a_rule = (frozenset(['b', 'e']), frozenset(['d']), 2, 1.0) self.assertTrue(a_rule in rules) ts2 = perftesting.get_default_transactions_alt() relim_input = itemmining.get_relim_input(ts2) report = itemmining.relim(relim_input, 2) rules = assocrules.mine_assoc_rules(report, min_support=2) self.assertEqual(29, len(rules)) a_rule = (frozenset(['e']), frozenset(['a', 'd']), 2, 2.0/3.0) self.assertTrue(a_rule in rules)
def testSupport5(self): ts1 = perftesting.get_default_transactions() relim_input = itemmining.get_relim_input(ts1) report = itemmining.relim(relim_input, 5) rules = assocrules.mine_assoc_rules(report, min_support=5) self.assertEqual(2, len(rules)) a_rule = (frozenset(['d']), frozenset(['b']), 6, 0.75) self.assertTrue(a_rule in rules)
def testDefaultSupportConf(self): ts1 = perftesting.get_default_transactions() relim_input = itemmining.get_relim_input(ts1) report = itemmining.relim(relim_input, 2) rules = assocrules.mine_assoc_rules(report, min_support=2) self.assertEqual(20, len(rules)) a_rule = (frozenset(['b', 'e']), frozenset(['d']), 2, 1.0) self.assertTrue(a_rule in rules) ts2 = perftesting.get_default_transactions_alt() relim_input = itemmining.get_relim_input(ts2) report = itemmining.relim(relim_input, 2) rules = assocrules.mine_assoc_rules(report, min_support=2) self.assertEqual(20, len(rules)) a_rule = (frozenset(['e']), frozenset(['a', 'd']), 2, 2.0/3.0) self.assertTrue(a_rule in rules)
def get_association_rules(seqs, min_support=2): transactions = list(seqs) # print transactions relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=min_support) rules = assocrules.mine_assoc_rules(item_sets, min_support=min_support, min_confidence=0.5) # print(rules) return rules
def testConfidenceForComplexRules(self): transab = (("a", "b"),) * 1000 transac = (("a", "c"),) * 1000 transabc = (("a", "b", "c"),) * 5 trans = transab + transac + transabc relim_input = itemmining.get_relim_input(trans) report = itemmining.relim(relim_input, 5) rules = assocrules.mine_assoc_rules(report, min_support=5, min_confidence=0.9) self.assertEqual(3, len(rules)) a_rule = (frozenset(["b", "c"]), frozenset(["a"]), 5, 1.0) self.assertTrue(a_rule in rules)
def testConfidenceForComplexRules(self): transab = (('a', 'b'), ) * 1000 transac = (('a', 'c'), ) * 1000 transabc = (('a', 'b', 'c'), ) * 5 trans = transab + transac + transabc relim_input = itemmining.get_relim_input(trans) report = itemmining.relim(relim_input, 5) rules = assocrules.mine_assoc_rules( report, min_support=5, min_confidence=0.9) self.assertEqual(3, len(rules)) a_rule = (frozenset(['b', 'c']), frozenset(['a']), 5, 1.0) self.assertTrue(a_rule in rules)
def mine_rules_relim(self, baskets): print "preparing itemset" relim_input = itemmining.get_relim_input(baskets) print "finding frequent itemsets" self.item_sets = itemmining.relim(relim_input, min_support = len(baskets) * self.min_support) print "finding association rules" self.rules = assocrules.mine_assoc_rules(self.item_sets, len(baskets), min_support = self.min_support, min_confidence = self.min_confidence, min_lift = self.min_lift) # sort by support self.nonmax_suppression() self.rules = sorted(self.rules, key = lambda x: -x[2])
def getAssoc(transactions,min_s=2,min_c=0.5): ''' getAssoc will return the association rule in the following format ''' result=dict() for key in transactions.keys(): relim_input=itemmining.get_relim_input(\ transactions[key]) itemset=itemmining.relim(relim_input\ ,min_support=min_s) result[key]=assocrules.mine_assoc_rules(\ itemset,min_support=min_s,min_confidence=min_c) return result
def mine_rules_fp(self, baskets): print "preparing fptree" fptree = itemmining.get_fptree(baskets, min_support = len(baskets) * self.min_support) print "finding itemsets" self.item_sets = itemmining.fpgrowth(fptree, min_support = len(baskets) * self.min_support) print "found {} frequent sequences".format(len(self.item_sets)) print "finding association rules" self.rules = assocrules.mine_assoc_rules(self.item_sets, len(baskets), min_support = self.min_support, min_confidence = self.min_confidence, min_lift = self.min_lift) # sort by support self.nonmax_suppression() self.max_rules = sorted(self.max_rules, key = lambda x: -x[2]) print "found {} maximal rules with sufficient lift".format(len(self.max_rules))
def execute(self,data_source): import csv transactions = [] with open(data_source, 'r') as f: reader = csv.reader(f) transactions = list(reader) # print(transactions) # transactions = [['a', 'b', 'c'], ['b'], ['a'], ['a', 'c', 'd'], ['b', 'c'], ['b', 'c']] # print(type(transactions)) relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support = self.support.get()) rules = assocrules.mine_assoc_rules(item_sets, min_support=self.support.get(), min_confidence=self.confidence.get_float()) result = "" for rule in rules: print(rule[0]) result = result + ", ".join(rule[0]) + " => " + ", ".join(rule[1]) + "\n" # result = result + ", ".join(rule[0]) + " => " + ", ".join(rule[1]) + ": " + str(rule[2]) + ", " + str(rule[3]) + "\n" # print(report) return result
def association_rules(data, min_support, min_confidence): """ Generates association rules from crawled data """ badges = data['badges'] transactions = data['transactions'] # pymining only works, if the identifiers are one character strings :( transactions = tuple(tuple(chr(b) for b in t) for t in transactions) # pymining dance relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=min_support) rules = assocrules.mine_assoc_rules(item_sets, min_support=min_support, min_confidence=min_confidence) # translate identifiers back to badge names rules = [[frozenset(badges[ord(b)] for b in r[0]), frozenset(badges[ord(b)] for b in r[1]), r[2], r[3]] for r in rules] return rules
def fun2(): transactions = (('a', 'b', 'c'), ('b'), ('a'), ('a', 'c', 'd'), ('b', 'c'), ('b', 'c')) relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=2) rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5) print("The default transactions data is:") print(transactions) time.sleep(0.5) input("Press any button to continue...") print( "Here is the association rules we have mined. Frozenset means the pattern in the transactions" ) time.sleep(1) print(rules) print( "\nNote:(frozenset({e'}), frozenset({'b', 'd'}), 2, 1.0) means:\n # e -> b, d with support 2 and confidence 0.66" ) input("Press Any button to return to CONTENT")
def getAssoc2(ts,groups,reverse,min_s=2,min_c=0.5): crules=dict() result=dict() relim_input=itemmining.get_relim_input(ts) itemset=itemmining.relim(relim_input,min_support=min_s) rules=assocrules.mine_assoc_rules(itemset,min_support=min_s\ ,min_confidence=min_c) # Now calculate the best rule for each cis # Clean the rules for rule in rules: if len(rule[0])>1: continue else: if rule[0] in crules.keys(): if (len(rule[1])+1)*rule[3]<=crules[rule[0]]: continue crules[rule[0]]=(len(rule[1])+1)*rule[3] for cis in groups.keys(): key=frozenset({groups[cis]}) if key in crules.keys(): result[cis]=crules[key] return result
def _execute(self): self.transactions = mongoComputeHashTagItemSets(self.name) relim_input = itemmining.get_relim_input(self.transactions) self.item_sets = itemmining.relim(relim_input, self.min_support) self.rules = assocrules.mine_assoc_rules(self.item_sets, self.min_support, self.min_confidence)
# # # print transactions # # relim_input = itemmining.get_relim_input(transactions) # # print 0.1*i # item_sets = itemmining.relim(relim_input, min_support=int(len(transactions)*0.1*3)) ## print len(item_sets) # rules = assocrules.mine_assoc_rules(item_sets, min_support=int(len(transactions)*0.1*3), min_confidence=0.1*i) # print len(rules) transactions = genOriginalActDietTypeDataSetForMoreSleep() relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=int(len(transactions)*0.1*3)) #print item_sets rules = assocrules.mine_assoc_rules(item_sets, min_support=len(transactions)*0.3, min_confidence=0.70) print rules #df = newDataProcess.newFeatureFrame() #df = df[df['label']==1] #print df.shape[0] #for factor in ['bike','leisure','starchyP','fruitP']: # df_temp1 = df[df[factor]>0] # #print df_temp1.shape[0] # df_temp2 = df[df['workStudy']>0] # #print df_temp2.shape[0] # df_temp3 = df_temp1[df_temp1['workStudy']>0] # #print df_temp3.shape[0] # print (float(df_temp3.shape[0])/(df_temp1.shape[0]*df_temp2.shape[0]))*df.shape[0]
import pandas as pd import numpy as np from pymining import seqmining, itemmining, assocrules, perftesting import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set() studydf = pd.read_csv("studydf.csv") violationdf = studydf[['INSPECTION DATE','VIOLATION CODE']].reset_index() violationdf['VIOLATION CODE'] = violationdf['VIOLATION CODE'].astype('str') plotseries = violationdf['VIOLATION CODE'].value_counts().iloc[0:20] ax = sns.barplot(y=plotseries.index, x=plotseries.values, palette="Blues_d") testdf = violationdf.groupby(['CAMIS'])['VIOLATION CODE'].apply(list) minelist = testdf.tolist()[0:10] minelist = tuple(tuple(x) for x in minelist) relim_input = itemmining.get_relim_input(minelist) item_sets = itemmining.relim(relim_input, min_support=2) rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5) print rules freq_seqs = seqmining.freq_seq_enum(minelist, 2) print freq_seqs rules2 = assocrules.mine_assoc_rules(item_sets, min_support=1, min_confidence=0.5) print rules2
from pymining import itemmining,assocrules,perftesting #support #s(X→Y)=σ(X∪Y)/N #confidence #c(X→Y)=σ(X∪Y)/σ(X) #transactions = perftesting.get_default_transactions() transactions =\ [['bread','milk'], ['bread','diaper','beer','eggs'], ['milk','diaper','beer','cola'], ['bread','milk','diaper','beer'], ['bread','milk','diaper','cola']] print('**************** transactions ****************') for transaction in transactions : print(transaction) relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input,min_support = 2) print('**************** item sets ****************') for item in item_sets : print(item) rules =\ assocrules.mine_assoc_rules(item_sets,min_support = 3,min_confidence = 0.8) print('**************** rules ****************') for rule in rules : print(str(rule[0]) + ' --> ' + str(rule[1]))
index = data[data > 0] for element in index.index: list_internal.append(element) list_external.append(list_internal) return list_external transactions = transaction_list( dd) #создаем список транзакция по каждому клиенту print(transactions[0] ) #список магазинов, который посещает клиент под номером [0] relim_input = itemmining.get_relim_input( transactions) #подготавливаем функции для работы item_sets = itemmining.relim(relim_input, min_support=1) rules = assocrules.mine_assoc_rules( item_sets, min_support=10, min_confidence=0.3 ) #устанавливаем пороги поддержки (реализация правила) и порог вероятности def write_rules2(rul): #функция для записи ассоицативных правил retMass = [] for el in rul: basis = '' for iterator in iter(el[0]): basis = basis + iterator + '-' conclusion = '' for iterator in iter(el[1]): conclusion = conclusion + iterator + '-' retMass.append([basis, conclusion, str(el[2]), str(el[3])]) return retMass
#!/usr/bin/env python from pymining import itemmining, assocrules data = (('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), ('a', 'f', 'g'), ('b', 'd', 'e', 'f', 'j'), ('a', 'b', 'd', 'i', 'k'), ('a', 'b', 'e', 'g')) min_sup = 3 min_conf = 0.5 # get frequent itemsets using pymining relim_input = itemmining.get_relim_input(data) frequent_itemsets = itemmining.relim(relim_input, min_sup) # get association rules using pymining results = assocrules.mine_assoc_rules(frequent_itemsets, min_sup, min_conf) for key in frequent_itemsets.keys(): print(str(key) + " : " + str(frequent_itemsets[key])) for key in results: print(str(key))
return tuple([df_fruits.loc[id, 'product_name'] for id in list_ids]) top_rules_names = [tuple(list(map(get_product_names, rule[:2]))+ [rule[2]]) for rule in top_rules] #%% Fast implemented #This takes long from pymining import itemmining, assocrules transactions=aisle_orders['products'] item_sets = itemmining.get_relim_input(transactions) #%% min_supp= SUPPORT_THRESHOLD * NUMBER_ORDERS_AISLE item_sets = itemmining.relim(item_sets, min_support=min_supp) #%% thresholds = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45] times = [] max_lengths = [] numbers = [] for t in thresholds: start = time.time() rules = assocrules.mine_assoc_rules(item_sets, min_support=min_supp, min_confidence=t) execution_time = time.time() - start times.append(execution_time) max_lengths.append(max([len(i[1]) for i in rules])) numbers.append(len(rules))
def frequency_item_set(self, columns = None, support = 0.1, rules = False, confidence = 0.8, engine = 'pymining'): """ Use frequency item set mining to find subgroups where data goes missing together. Parameters: ---------- columns: list, default None Subset of the columns you want to use. support: float, default 0.1 Minimum support to use while item set mining. Too small values can break memory. Support should be greater than zero and less than 1. rules: bool, default True Whether association rules should be mined. If True, method returns two_sample dataframes instead of one. confidence: float, default Minimum confidence for rules being mined. Should be between 0 and 1. engine: {'pymining'} Only one engine is being supported right now. Returns: ------- item_sets_df, rules_df : DataFrame, DataFrame Tabulated results for itemsets and association rules mined. """ from pymining import itemmining, assocrules if support<=0 or support>1: #support should be between one and zero. print('Support has to be between 0 and 1') return if confidence<0 or confidence>1: #confidence can be zero. print('Confidence has to be between 0 and 1') return mf_ = self._masked_missframe(where = None, columns = columns, how = 'any') # Converting all missing values to 1, and non-missing to nan. bench = pd.DataFrame(np.where(mf_, 1, np.nan), columns = mf_.columns) # Replacing 1's with the index of the column they belong to. # Converting to numbers instead of column names for supposed performance boost. bench = bench * list(range(0, mf_.columns.shape[0])) rows = bench.values transactions = [] for row in rows: # Removing the nans in each row and compressing the rows. # (nan, 1, nan, 3) --> (1, 3) transactions.append(tuple(row[~np.isnan(row)])) # Converting float threshold to represent number of rows. support = int(support*mf_.shape[0]) relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=support) # Converting to DataFrames and getting columns names back. item_sets_df = pd.DataFrame({'Itemset':list(item_sets.keys()), 'Support': list(item_sets.values())}) item_sets_df.Itemset = item_sets_df.Itemset.apply(lambda x: mf_.columns[list(x)].tolist()) # For now the same supports being used in FIM and Association Rules. rules = assocrules.mine_assoc_rules(item_sets, min_support=support, min_confidence=confidence) rules_df = pd.DataFrame(rules, columns = ['X =>', 'Y', 'Support', 'Confidence']) # Converting rules to DataFrame and getting columns names back. rules_df['X =>'] = rules_df['X =>'].apply(lambda x: mf_.columns[list(x)].tolist()) rules_df['Y'] = rules_df['Y'].apply(lambda x: mf_.columns[list(x)].tolist()) return item_sets_df, rules_df
def frequency_item_set(self, columns=None, support=0.1, rules=False, confidence=0.8, engine='pymining'): """ Use frequency item set mining to find subgroups where data goes missing together. Parameters: ---------- columns: list, default None Subset of the columns you want to use. support: float, default 0.1 Minimum support to use while item set mining. Too small values can break memory. Support should be greater than zero and less than 1. rules: bool, default True Whether association rules should be mined. If True, method returns two_sample dataframes instead of one. confidence: float, default Minimum confidence for rules being mined. Should be between 0 and 1. engine: {'pymining'} Only one engine is being supported right now. Returns: ------- item_sets_df, rules_df : DataFrame, DataFrame Tabulated results for itemsets and association rules mined. """ from pymining import itemmining, assocrules if support <= 0 or support > 1: #support should be between one and zero. print('Support has to be between 0 and 1') return if confidence < 0 or confidence > 1: #confidence can be zero. print('Confidence has to be between 0 and 1') return mf_ = self._masked_missframe(where=None, columns=columns, how='any') # Converting all missing values to 1, and non-missing to nan. bench = pd.DataFrame(np.where(mf_, 1, np.nan), columns=mf_.columns) # Replacing 1's with the index of the column they belong to. # Converting to numbers instead of column names for supposed performance boost. bench = bench * list(range(0, mf_.columns.shape[0])) rows = bench.values transactions = [] for row in rows: # Removing the nans in each row and compressing the rows. # (nan, 1, nan, 3) --> (1, 3) transactions.append(tuple(row[~np.isnan(row)])) # Converting float threshold to represent number of rows. support = int(support * mf_.shape[0]) relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=support) # Converting to DataFrames and getting columns names back. item_sets_df = pd.DataFrame({ 'Itemset': list(item_sets.keys()), 'Support': list(item_sets.values()) }) item_sets_df.Itemset = item_sets_df.Itemset.apply( lambda x: mf_.columns[list(x)].tolist()) # For now the same supports being used in FIM and Association Rules. rules = assocrules.mine_assoc_rules(item_sets, min_support=support, min_confidence=confidence) rules_df = pd.DataFrame(rules, columns=['X =>', 'Y', 'Support', 'Confidence']) # Converting rules to DataFrame and getting columns names back. rules_df['X =>'] = rules_df['X =>'].apply( lambda x: mf_.columns[list(x)].tolist()) rules_df['Y'] = rules_df['Y'].apply( lambda x: mf_.columns[list(x)].tolist()) return item_sets_df, rules_df
min_sup = 4 min_conf = 0.6 # input data data = [] with open('./Frequent_Itemset.dat', 'r') as f: for line in f.readlines(): data.append(list(line.split())) transactions = data relim_input = itemmining.get_relim_input(transactions) report = itemmining.relim(relim_input, min_support=min_sup) # print(report) print('\n============== Frequent Itemsets ================\n') for r, n in report.items(): print(r, n) # for key, value in report.items(): # if value == 5: # print(key) print('\n\n\n============== confidence ================\n') rules1 = assocrules.mine_assoc_rules(report, min_support=min_sup, min_confidence=min_conf) # print(rules1) for i in rules1: print(i)
def generate_association_rules(item_sets, support, confidence): rules = assocrules.mine_assoc_rules(item_sets, min_support=support, min_confidence=confidence) return rules
def Apriori_tow(data_tuple): transactions = data_tuple relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=100) rules = assocrules.mine_assoc_rules(item_sets, min_support=100, min_confidence=0.5) print(rules)
def Apriori_three(data_tuple): transactions = perftesting.get_default_transactions() relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=50)#////最小关联度 rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5) print(rules)
# groupby combine str #freq = df.groupby('零售小票编号')['类别描述'].apply(lambda x: "[%s]" % ','.join(x)) freq = df.groupby('零售小票编号')['商品类别'].apply(lambda x: ','.join(x)) freq = freq.map(lambda x: x.strip(',').split(',')) # In[56]: relim_input = itemmining.get_relim_input(freq) report = itemmining.relim(relim_input, min_support=30) report # In[57]: rules1 = assocrules.mine_assoc_rules(report, min_support=30, min_confidence=0.5) rules1 # In[58]: a = [] for line in rules1: ## (len(line[0])>1 or len(line[1])>1) could be added for filtering - k-itme set>2 ## if ('未知' not in line[0] and '未知' not in line[1]): a.append(line) # In[59]: result = pd.DataFrame( a, columns=['first_set', 'second_set', 'support', 'confidence'])
item = [] rating = [] for i in x: item.append(i[0]) sorted_points = sorted(item) return sorted_points sparseVectorData2 = rdd2.map(lambda a :sv_format2(a[1])) transactions = sparseVectorData2.collect() #print sparseVectorData #Frequent Item Set Mining relim_input = itemmining.get_relim_input(transactions) report = itemmining.relim(relim_input, min_support=10) print report #Association Rules rules = assocrules.mine_assoc_rules(report, min_support=10, min_confidence=0.1) print rules #============================================================================== # patterns = pyfpgrowth.find_frequent_patterns(transactions, 10) # print patterns # # rules = pyfpgrowth.generate_association_rules(patterns, 0.5) # print rules #==============================================================================
def dataMining(user, password, database, output_file, support, confidence): db = MySQLdb.connect("localhost", user, password, database) cursor = db.cursor() sql1 = "SELECT `progressivoSDO` FROM `tracks`.`interventoPrincipale`;" cursor.execute(sql1) data1 = cursor.fetchall() array1 = [] for row in data1: array1.append(row[0]) sql2 = """SELECT interventoPrincipale.interventoPrincipale, interventiSecondari.interventiSecondari, interventoPrincipale.progressivoSDO FROM interventoPrincipale INNER JOIN interventiSecondari ON interventoPrincipale.progressivoSDO=interventiSecondari.progressivoSDO""" cursor.execute(sql2) data2 = cursor.fetchall() array2 = [] for sdo in array1: temp = [] counter = 0 for op in data2: if op[2] == sdo: if counter == 0: temp.append(op[0]) temp.append(op[1]) counter += 1 array2.append(temp) input = itemmining.get_relim_input(array2) reportFP = itemmining.relim(input, min_support=support) with open(output_file, "w") as file: file.write( """Frequent ItemSets (procedure codes sets) Mining results: \n""") file.write("""\n""") file.write( """Note that due to library source code the results are displayed with the following schema: \n""" ) file.write( """ frozenset(procedure codes sets) (support of this sets) \n """ ) file.write("""\n""") for rep1 in reportFP: print(rep1, reportFP[rep1]) file.writelines(str(rep1) + str(reportFP[rep1]) + "\n") reportAR = assocrules.mine_assoc_rules(reportFP, min_support=support, min_confidence=confidence) file.write("\n") file.write("\n") file.write("\n") file.write( """Association Rules ItemSets (procedure codes sets) Mining results: \n""" ) file.write("""\n""") file.write( """Note that due to library source code the results are displayed with the following schema: \n""" ) file.write( """(frozenset(procedure codes sets 1), frozenset(procedure codes sets 2) where available, (support of this sets), (confidence of this sets)) \n """ ) file.write("""\n""") for rep2 in reportAR: print(rep2) file.write(str(rep2) + "\n") print("All done, check %s to see the results of the data mining" % args.filename)
def association_rules(self): item_sets = self.freq_items() rules = assocrules.mine_assoc_rules(item_sets, self.min_sup, self.min_conf) return rules
from pymining import itemmining, assocrules, perftesting import csv def get_my_transactions(): '''Returns a small list of transactions. For testing purpose.''' return (('A0T2', 'A1T2S0'), ('A1T2', 'A1T2S1'), ('A1T3', 'A1T2S0'), ('A1T2S0', 'A1T2'), ('A1T3', 'A1T2S1')) return (('A1', 'T2'), ('A1', 'T2', 'S0'), ('A1', 'T2'), ('A1', 'T2', 'S0'), ('A1', 'T2'), ('A1', 'T2', 'S0'), ('A1', 'T2', 'S0'), ('A1', 'T2', 'S0'), ('A1', 'T2'), ('A1', 'T2', 'S0')) # with open('../row_data', newline='', encoding='utf-8') as csvfile: # file_reader = csv.reader(csvfile, delimiter=',') # for row in file_reader: # data_list = (row[2], row[3], row[4]) # print(row[2]) # transactions = perftesting.get_default_transactions() transactions = get_my_transactions() #print(transactions) #transactions = transactions relim_input = itemmining.get_relim_input(transactions) item_sets = itemmining.relim(relim_input, min_support=0.2) rules = assocrules.mine_assoc_rules(item_sets, min_support=0.2, min_confidence=0.5) print(rules)