Example #1
0
def getCooccur(ts,groups,reverse,min_s=2,min_c=0.5):
    crules=dict()
    result=dict()
    relim_input=itemmining.get_relim_input(ts)
    itemset=itemmining.relim(relim_input,min_support=min_s)
    rules=assocrules.mine_assoc_rules(itemset,min_support=min_s\
                                      ,min_confidence=min_c)
    # Now calculate the best rule for each cis
    # Clean the rules
    for rule in rules:
        if len(rule[0])>1:
            continue
        else:
            if not rule[0] in crules.keys():
                crules[rule[0]]=dict()
                for elem in rule[1]:
                    crules[rule[0]][elem]=rule[3]
    for x in reverse.keys():
        if not frozenset({x}) in crules.keys():
            continue
        for y in reverse.keys():
            if not y in crules[frozenset({x})].keys():
                continue
            result[(x,y)]=crules[frozenset({x})][y]
            
        
    return result
Example #2
0
def associationRules(transactions, userid, followed=(), c_userid=None):
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=2)
    rules = assocrules.mine_assoc_rules(item_sets,
                                        min_support=2,
                                        min_confidence=0.5)

    recom_user = {}
    for rule_user in rules:
        if userid in rule_user[0] and not any(
                map(rule_user[1].__contains__,
                    followed)) and not c_userid in rule_user[1]:
            # 支持度
            support = rule_user[2] / len(transactions)
            # リフト値 1より大きい場合は、Aが発生するとBが発生しやすくなると解釈できる
            lift = (rule_user[3] / support, )
            if lift[0] <= 1:
                continue
            rule_user += lift
            recom_user[rule_user[1]] = rule_user[4]
    recom_user_sorted = sorted(recom_user.items(),
                               key=lambda x: x[1],
                               reverse=True)
    print("*" * 100)
    print("ユーザーレコメンド(バスケット分析)")
    print(recom_user_sorted)
    print("*" * 100)
    rcom_userid_list = set()
    for rcom_userid in recom_user_sorted:
        rcom_userid_list = rcom_userid_list.union(rcom_userid[0])
    return list(rcom_userid_list)
def printPyminingResult(transactions, support, confidence):
    print '\n\nPymining algorithm:'
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=support * 196)

    print 'Frequent item set:( size:', len(item_sets), ')'
    for key in item_sets:
        print '[',
        for keys in key:
            print keys, ',',
        print '], ',

    rules = assocrules.mine_assoc_rules(item_sets,
                                        min_support=support * 196,
                                        min_confidence=confidence)
    print '\n\nRules:'
    for rule in rules:
        print '[',
        for _ in rule[0]:
            print _,
            if (len(rule[0]) > 1):
                print ',',
        print '->',
        for _ in rule[1]:
            print _,
            if (len(rule[1]) > 1):
                print ',',
        print '], confidence:', rule[3], ', support:', rule[2] / float(196)
def main(transactions, min_sup, min_conf):
    item_mining = freq_mining(transactions, min_sup, min_conf)
    freq_items = item_mining.freq_items()
    # rules = item_mining.association_rules()
    rules = assocrules.mine_assoc_rules(freq_items, item_mining.min_sup,
                                        item_mining.min_conf)

    print(freq_items)
    def testConfidence075(self):
        ts1 = perftesting.get_default_transactions()
        relim_input = itemmining.get_relim_input(ts1)
        report = itemmining.relim(relim_input, 2)
        rules = assocrules.mine_assoc_rules(report, min_support=2, min_confidence=0.75)
        self.assertEqual(8, len(rules))

        a_rule = (frozenset(["b"]), frozenset(["d"]), 6, 0.75)
        self.assertTrue(a_rule in rules)
    def testDefaultSupportConf(self):
        ts1 = perftesting.get_default_transactions()
        relim_input = itemmining.get_relim_input(ts1)
        report = itemmining.relim(relim_input, 2)
        rules = assocrules.mine_assoc_rules(report, min_support=2)
        self.assertEqual(23, len(rules))

        a_rule = (frozenset(['b', 'e']), frozenset(['d']), 2, 1.0)
        self.assertTrue(a_rule in rules)

        ts2 = perftesting.get_default_transactions_alt()
        relim_input = itemmining.get_relim_input(ts2)
        report = itemmining.relim(relim_input, 2)
        rules = assocrules.mine_assoc_rules(report, min_support=2)
        self.assertEqual(29, len(rules))

        a_rule = (frozenset(['e']), frozenset(['a', 'd']), 2, 2.0/3.0)
        self.assertTrue(a_rule in rules)
Example #7
0
    def testSupport5(self):
        ts1 = perftesting.get_default_transactions()
        relim_input = itemmining.get_relim_input(ts1)
        report = itemmining.relim(relim_input, 5)
        rules = assocrules.mine_assoc_rules(report, min_support=5)
        self.assertEqual(2, len(rules))

        a_rule = (frozenset(['d']), frozenset(['b']), 6, 0.75)
        self.assertTrue(a_rule in rules)
Example #8
0
    def testDefaultSupportConf(self):
        ts1 = perftesting.get_default_transactions()
        relim_input = itemmining.get_relim_input(ts1)
        report = itemmining.relim(relim_input, 2)
        rules = assocrules.mine_assoc_rules(report, min_support=2)
        self.assertEqual(20, len(rules))

        a_rule = (frozenset(['b', 'e']), frozenset(['d']), 2, 1.0)
        self.assertTrue(a_rule in rules)

        ts2 = perftesting.get_default_transactions_alt()
        relim_input = itemmining.get_relim_input(ts2)
        report = itemmining.relim(relim_input, 2)
        rules = assocrules.mine_assoc_rules(report, min_support=2)
        self.assertEqual(20, len(rules))

        a_rule = (frozenset(['e']), frozenset(['a', 'd']), 2, 2.0/3.0)
        self.assertTrue(a_rule in rules)
    def testSupport5(self):
        ts1 = perftesting.get_default_transactions()
        relim_input = itemmining.get_relim_input(ts1)
        report = itemmining.relim(relim_input, 5)
        rules = assocrules.mine_assoc_rules(report, min_support=5)
        self.assertEqual(2, len(rules))

        a_rule = (frozenset(['d']), frozenset(['b']), 6, 0.75)
        self.assertTrue(a_rule in rules)
def get_association_rules(seqs, min_support=2):
    transactions = list(seqs)

    # print transactions
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=min_support)
    rules = assocrules.mine_assoc_rules(item_sets, min_support=min_support, min_confidence=0.5)
    # print(rules)

    return rules
    def testConfidenceForComplexRules(self):
        transab = (("a", "b"),) * 1000
        transac = (("a", "c"),) * 1000
        transabc = (("a", "b", "c"),) * 5
        trans = transab + transac + transabc
        relim_input = itemmining.get_relim_input(trans)
        report = itemmining.relim(relim_input, 5)
        rules = assocrules.mine_assoc_rules(report, min_support=5, min_confidence=0.9)
        self.assertEqual(3, len(rules))

        a_rule = (frozenset(["b", "c"]), frozenset(["a"]), 5, 1.0)
        self.assertTrue(a_rule in rules)
    def testConfidenceForComplexRules(self):
        transab = (('a', 'b'), ) * 1000
        transac = (('a', 'c'), ) * 1000
        transabc = (('a', 'b', 'c'), ) * 5
        trans = transab + transac + transabc
        relim_input = itemmining.get_relim_input(trans)
        report = itemmining.relim(relim_input, 5)
        rules = assocrules.mine_assoc_rules(
            report, min_support=5, min_confidence=0.9)
        self.assertEqual(3, len(rules))

        a_rule = (frozenset(['b', 'c']), frozenset(['a']), 5, 1.0)
        self.assertTrue(a_rule in rules)
Example #13
0
 def mine_rules_relim(self, baskets):
   print "preparing itemset"
   relim_input = itemmining.get_relim_input(baskets)
   
   print "finding frequent itemsets"
   self.item_sets = itemmining.relim(relim_input, min_support = len(baskets) * self.min_support)
   
   print "finding association rules"
   self.rules = assocrules.mine_assoc_rules(self.item_sets, len(baskets), 
       min_support = self.min_support, min_confidence = self.min_confidence, 
       min_lift = self.min_lift)
   
   # sort by support
   self.nonmax_suppression()
   self.rules = sorted(self.rules, key = lambda x: -x[2])
Example #14
0
def getAssoc(transactions,min_s=2,min_c=0.5):
    '''
    getAssoc will return the association rule in the following 
    format
    '''
    result=dict()
    for key in transactions.keys():
        relim_input=itemmining.get_relim_input(\
            transactions[key])
        itemset=itemmining.relim(relim_input\
                                 ,min_support=min_s)
        result[key]=assocrules.mine_assoc_rules(\
                                                itemset,min_support=min_s,min_confidence=min_c)

    return result
Example #15
0
  def mine_rules_fp(self, baskets):
    print "preparing fptree"
    fptree = itemmining.get_fptree(baskets, min_support = len(baskets) * self.min_support)
    
    print "finding itemsets"
    self.item_sets = itemmining.fpgrowth(fptree, min_support = len(baskets) * self.min_support)

    print "found {} frequent sequences".format(len(self.item_sets))
    
    print "finding association rules"
    self.rules = assocrules.mine_assoc_rules(self.item_sets, len(baskets), 
        min_support = self.min_support, min_confidence = self.min_confidence, 
        min_lift = self.min_lift)
    
    # sort by support
    self.nonmax_suppression()
    self.max_rules = sorted(self.max_rules, key = lambda x: -x[2])

    print "found {} maximal rules with sufficient lift".format(len(self.max_rules))
Example #16
0
    def execute(self,data_source):
        import csv
        transactions = []
        with open(data_source, 'r') as f:
            reader = csv.reader(f)
            transactions = list(reader)
        # print(transactions)
        # transactions = [['a', 'b', 'c'], ['b'], ['a'], ['a', 'c', 'd'], ['b', 'c'], ['b', 'c']]
        # print(type(transactions))
        relim_input = itemmining.get_relim_input(transactions)
        item_sets = itemmining.relim(relim_input, min_support = self.support.get())
        rules = assocrules.mine_assoc_rules(item_sets, min_support=self.support.get(), min_confidence=self.confidence.get_float())
        result = ""
        for rule in rules:
            print(rule[0])
            result = result + ", ".join(rule[0]) + " => " + ", ".join(rule[1]) + "\n"

            # result = result + ", ".join(rule[0]) + " => " + ", ".join(rule[1]) + ": " + str(rule[2]) + ", " + str(rule[3]) + "\n"
        # print(report)
        return result
Example #17
0
def association_rules(data, min_support, min_confidence):
    """
    Generates association rules from crawled data
    """
    badges = data['badges']
    transactions = data['transactions']

    # pymining only works, if the identifiers are one character strings :(
    transactions = tuple(tuple(chr(b) for b in t) for t in transactions)

    # pymining dance
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=min_support)
    rules = assocrules.mine_assoc_rules(item_sets, min_support=min_support,
                                        min_confidence=min_confidence)

    # translate identifiers back to badge names
    rules = [[frozenset(badges[ord(b)] for b in r[0]),
              frozenset(badges[ord(b)] for b in r[1]),
              r[2], r[3]] for r in rules]
    return rules
Example #18
0
def fun2():
    transactions = (('a', 'b', 'c'), ('b'), ('a'), ('a', 'c', 'd'), ('b', 'c'),
                    ('b', 'c'))
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=2)
    rules = assocrules.mine_assoc_rules(item_sets,
                                        min_support=2,
                                        min_confidence=0.5)
    print("The default transactions data is:")
    print(transactions)

    time.sleep(0.5)
    input("Press any button to continue...")
    print(
        "Here is the association rules we have mined. Frozenset means the pattern in the transactions"
    )
    time.sleep(1)
    print(rules)
    print(
        "\nNote:(frozenset({e'}), frozenset({'b', 'd'}), 2, 1.0) means:\n # e -> b, d with support 2 and confidence 0.66"
    )
    input("Press Any button to return to CONTENT")
Example #19
0
def getAssoc2(ts,groups,reverse,min_s=2,min_c=0.5):
    crules=dict()
    result=dict()
    relim_input=itemmining.get_relim_input(ts)
    itemset=itemmining.relim(relim_input,min_support=min_s)
    rules=assocrules.mine_assoc_rules(itemset,min_support=min_s\
                                      ,min_confidence=min_c)
    # Now calculate the best rule for each cis
    # Clean the rules
    for rule in rules:
        if len(rule[0])>1:
            continue
        else:
            if rule[0] in crules.keys():
                if (len(rule[1])+1)*rule[3]<=crules[rule[0]]:
                    continue
                crules[rule[0]]=(len(rule[1])+1)*rule[3]
    for cis in groups.keys():
        key=frozenset({groups[cis]})
        if key in crules.keys():
            result[cis]=crules[key]
        
    return result
Example #20
0
 def _execute(self):
     
     self.transactions = mongoComputeHashTagItemSets(self.name)
     relim_input = itemmining.get_relim_input(self.transactions)
     self.item_sets = itemmining.relim(relim_input, self.min_support)
     self.rules = assocrules.mine_assoc_rules(self.item_sets, self.min_support, self.min_confidence)
Example #21
0
#    
#    # print transactions
#    
#    relim_input = itemmining.get_relim_input(transactions)
#
#    print 0.1*i
#    item_sets = itemmining.relim(relim_input, min_support=int(len(transactions)*0.1*3))
##    print len(item_sets)
#    rules = assocrules.mine_assoc_rules(item_sets, min_support=int(len(transactions)*0.1*3), min_confidence=0.1*i)
#    print len(rules)

transactions = genOriginalActDietTypeDataSetForMoreSleep()
relim_input = itemmining.get_relim_input(transactions)
item_sets = itemmining.relim(relim_input, min_support=int(len(transactions)*0.1*3))
#print item_sets  
rules = assocrules.mine_assoc_rules(item_sets, min_support=len(transactions)*0.3, min_confidence=0.70)
print rules 


#df = newDataProcess.newFeatureFrame()
#df = df[df['label']==1]
#print df.shape[0]
#for factor in ['bike','leisure','starchyP','fruitP']:
#    df_temp1 = df[df[factor]>0]
#    #print df_temp1.shape[0]
#    df_temp2 = df[df['workStudy']>0]
#    #print df_temp2.shape[0]
#    df_temp3 = df_temp1[df_temp1['workStudy']>0]
#    #print df_temp3.shape[0]
#    print (float(df_temp3.shape[0])/(df_temp1.shape[0]*df_temp2.shape[0]))*df.shape[0]
Example #22
0
import pandas as pd
import numpy as np
from pymining import seqmining, itemmining, assocrules, perftesting
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

studydf = pd.read_csv("studydf.csv")
violationdf = studydf[['INSPECTION DATE','VIOLATION CODE']].reset_index()
violationdf['VIOLATION CODE'] = violationdf['VIOLATION CODE'].astype('str')
plotseries = violationdf['VIOLATION CODE'].value_counts().iloc[0:20]
ax = sns.barplot(y=plotseries.index, x=plotseries.values, palette="Blues_d")
testdf = violationdf.groupby(['CAMIS'])['VIOLATION CODE'].apply(list)
minelist = testdf.tolist()[0:10]
minelist = tuple(tuple(x) for x in minelist)
relim_input = itemmining.get_relim_input(minelist)
item_sets = itemmining.relim(relim_input, min_support=2)
rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5)
print rules
freq_seqs = seqmining.freq_seq_enum(minelist, 2)
print freq_seqs
rules2 = assocrules.mine_assoc_rules(item_sets, min_support=1, min_confidence=0.5)
print rules2
Example #23
0
from pymining import itemmining,assocrules,perftesting

#support
#s(X→Y)=σ(X∪Y)/N
#confidence
#c(X→Y)=σ(X∪Y)/σ(X)

#transactions = perftesting.get_default_transactions()
transactions =\
    [['bread','milk'],
     ['bread','diaper','beer','eggs'],
     ['milk','diaper','beer','cola'],
     ['bread','milk','diaper','beer'],
     ['bread','milk','diaper','cola']]
print('**************** transactions ****************')
for transaction in transactions :
    print(transaction)
relim_input = itemmining.get_relim_input(transactions)
item_sets = itemmining.relim(relim_input,min_support = 2)
print('**************** item sets ****************')
for item in item_sets :
    print(item)
rules =\
    assocrules.mine_assoc_rules(item_sets,min_support = 3,min_confidence = 0.8)
print('**************** rules ****************')
for rule in rules :
    print(str(rule[0]) + ' --> ' + str(rule[1]))

Example #24
0
        index = data[data > 0]
        for element in index.index:
            list_internal.append(element)
        list_external.append(list_internal)
    return list_external


transactions = transaction_list(
    dd)  #создаем список транзакция по каждому клиенту
print(transactions[0]
      )  #список магазинов, который посещает клиент под номером [0]
relim_input = itemmining.get_relim_input(
    transactions)  #подготавливаем функции для работы
item_sets = itemmining.relim(relim_input, min_support=1)
rules = assocrules.mine_assoc_rules(
    item_sets, min_support=10, min_confidence=0.3
)  #устанавливаем пороги поддержки (реализация правила) и порог вероятности


def write_rules2(rul):  #функция для записи ассоицативных правил
    retMass = []
    for el in rul:
        basis = ''
        for iterator in iter(el[0]):
            basis = basis + iterator + '-'
        conclusion = ''
        for iterator in iter(el[1]):
            conclusion = conclusion + iterator + '-'
        retMass.append([basis, conclusion, str(el[2]), str(el[3])])
    return retMass
Example #25
0
#!/usr/bin/env python

from pymining import itemmining, assocrules

data = (('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), ('a', 'f', 'g'),
        ('b', 'd', 'e', 'f', 'j'), ('a', 'b', 'd', 'i', 'k'), ('a', 'b', 'e',
                                                               'g'))

min_sup = 3
min_conf = 0.5

# get frequent itemsets using pymining
relim_input = itemmining.get_relim_input(data)
frequent_itemsets = itemmining.relim(relim_input, min_sup)

# get association rules using pymining
results = assocrules.mine_assoc_rules(frequent_itemsets, min_sup, min_conf)

for key in frequent_itemsets.keys():
    print(str(key) + " : " + str(frequent_itemsets[key]))

for key in results:
    print(str(key))
Example #26
0
    return tuple([df_fruits.loc[id, 'product_name'] for id in list_ids])

top_rules_names = [tuple(list(map(get_product_names, rule[:2]))+ [rule[2]]) for rule in top_rules]
#%% Fast implemented

#This takes long
from pymining import itemmining, assocrules
transactions=aisle_orders['products']
item_sets = itemmining.get_relim_input(transactions)
#%%

min_supp= SUPPORT_THRESHOLD * NUMBER_ORDERS_AISLE
item_sets = itemmining.relim(item_sets, min_support=min_supp)

#%%
thresholds = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
times = []
max_lengths = []
numbers = []
for t in thresholds:
    start = time.time()          
    rules = assocrules.mine_assoc_rules(item_sets, min_support=min_supp, min_confidence=t)
    execution_time = time.time() - start
    times.append(execution_time)
    max_lengths.append(max([len(i[1]) for i in rules]))
    numbers.append(len(rules))


      
    
Example #27
0
    def frequency_item_set(self, columns = None, support = 0.1, rules = False, confidence = 0.8, engine = 'pymining'):
        """
        Use frequency item set mining to find subgroups where data goes 
        missing together.
        
        Parameters:
        ----------
        columns: list, default None
            Subset of the columns you want to use.
        
        support: float, default 0.1
            Minimum support to use while item set mining. Too small values can break memory.
            Support should be greater than zero and less than 1.

        rules: bool, default True
            Whether association rules should be mined. If True, method returns two_sample
            dataframes instead of one.

        confidence: float, default
            Minimum confidence for rules being mined. Should be between 0 and 1.

        engine: {'pymining'}
            Only one engine is being supported right now.
        
       
        Returns:
        -------
        item_sets_df, rules_df : DataFrame, DataFrame
            Tabulated results for itemsets and association rules mined. 
            
        """ 

        from pymining import itemmining, assocrules

        if support<=0 or support>1: #support should be between one and zero.
            print('Support has to be between 0 and 1')
            return

        if confidence<0 or confidence>1: #confidence can be zero.
            print('Confidence has to be between 0 and 1')
            return
            


        mf_ = self._masked_missframe(where = None, columns = columns, how = 'any')
        
        # Converting all missing values to 1, and non-missing to nan.
        bench = pd.DataFrame(np.where(mf_, 1, np.nan), columns = mf_.columns)

        # Replacing 1's with the index of the column they belong to.
        # Converting to numbers instead of column names for supposed performance boost.
        bench = bench * list(range(0, mf_.columns.shape[0]))

        rows = bench.values
        transactions = []
        for row in rows:
            # Removing the nans in each row and compressing the rows.
            # (nan, 1, nan, 3) --> (1, 3)
            transactions.append(tuple(row[~np.isnan(row)]))

        # Converting float threshold to represent number of rows.
        support = int(support*mf_.shape[0])

        relim_input = itemmining.get_relim_input(transactions)
        item_sets = itemmining.relim(relim_input, min_support=support)
        
        # Converting to DataFrames and getting columns names back.
        item_sets_df = pd.DataFrame({'Itemset':list(item_sets.keys()), 'Support': list(item_sets.values())})
        item_sets_df.Itemset = item_sets_df.Itemset.apply(lambda x: mf_.columns[list(x)].tolist())

        
        # For now the same supports being used in FIM and Association Rules.
        rules = assocrules.mine_assoc_rules(item_sets, min_support=support, min_confidence=confidence)

        rules_df = pd.DataFrame(rules, columns = ['X =>', 'Y', 'Support', 'Confidence'])
        # Converting rules to DataFrame and getting columns names back.
        rules_df['X =>'] = rules_df['X =>'].apply(lambda x: mf_.columns[list(x)].tolist())
        rules_df['Y'] = rules_df['Y'].apply(lambda x: mf_.columns[list(x)].tolist())
        
        return item_sets_df, rules_df
Example #28
0
    def frequency_item_set(self,
                           columns=None,
                           support=0.1,
                           rules=False,
                           confidence=0.8,
                           engine='pymining'):
        """
        Use frequency item set mining to find subgroups where data goes 
        missing together.
        
        Parameters:
        ----------
        columns: list, default None
            Subset of the columns you want to use.
        
        support: float, default 0.1
            Minimum support to use while item set mining. Too small values can break memory.
            Support should be greater than zero and less than 1.

        rules: bool, default True
            Whether association rules should be mined. If True, method returns two_sample
            dataframes instead of one.

        confidence: float, default
            Minimum confidence for rules being mined. Should be between 0 and 1.

        engine: {'pymining'}
            Only one engine is being supported right now.
        
       
        Returns:
        -------
        item_sets_df, rules_df : DataFrame, DataFrame
            Tabulated results for itemsets and association rules mined. 
            
        """

        from pymining import itemmining, assocrules

        if support <= 0 or support > 1:  #support should be between one and zero.
            print('Support has to be between 0 and 1')
            return

        if confidence < 0 or confidence > 1:  #confidence can be zero.
            print('Confidence has to be between 0 and 1')
            return

        mf_ = self._masked_missframe(where=None, columns=columns, how='any')

        # Converting all missing values to 1, and non-missing to nan.
        bench = pd.DataFrame(np.where(mf_, 1, np.nan), columns=mf_.columns)

        # Replacing 1's with the index of the column they belong to.
        # Converting to numbers instead of column names for supposed performance boost.
        bench = bench * list(range(0, mf_.columns.shape[0]))

        rows = bench.values
        transactions = []
        for row in rows:
            # Removing the nans in each row and compressing the rows.
            # (nan, 1, nan, 3) --> (1, 3)
            transactions.append(tuple(row[~np.isnan(row)]))

        # Converting float threshold to represent number of rows.
        support = int(support * mf_.shape[0])

        relim_input = itemmining.get_relim_input(transactions)
        item_sets = itemmining.relim(relim_input, min_support=support)

        # Converting to DataFrames and getting columns names back.
        item_sets_df = pd.DataFrame({
            'Itemset': list(item_sets.keys()),
            'Support': list(item_sets.values())
        })
        item_sets_df.Itemset = item_sets_df.Itemset.apply(
            lambda x: mf_.columns[list(x)].tolist())

        # For now the same supports being used in FIM and Association Rules.
        rules = assocrules.mine_assoc_rules(item_sets,
                                            min_support=support,
                                            min_confidence=confidence)

        rules_df = pd.DataFrame(rules,
                                columns=['X =>', 'Y', 'Support', 'Confidence'])
        # Converting rules to DataFrame and getting columns names back.
        rules_df['X =>'] = rules_df['X =>'].apply(
            lambda x: mf_.columns[list(x)].tolist())
        rules_df['Y'] = rules_df['Y'].apply(
            lambda x: mf_.columns[list(x)].tolist())

        return item_sets_df, rules_df
Example #29
0
min_sup = 4
min_conf = 0.6

# input data
data = []
with open('./Frequent_Itemset.dat', 'r') as f:
    for line in f.readlines():
        data.append(list(line.split()))

transactions = data

relim_input = itemmining.get_relim_input(transactions)
report = itemmining.relim(relim_input, min_support=min_sup)

# print(report)
print('\n============== Frequent Itemsets ================\n')
for r, n in report.items():
    print(r, n)

# for key, value in report.items():
#     if value == 5:
#         print(key)

print('\n\n\n============== confidence ================\n')

rules1 = assocrules.mine_assoc_rules(report,
                                     min_support=min_sup,
                                     min_confidence=min_conf)
# print(rules1)
for i in rules1:
    print(i)
def generate_association_rules(item_sets, support, confidence):
    rules = assocrules.mine_assoc_rules(item_sets, min_support=support, min_confidence=confidence)
    return rules
Example #31
0
def Apriori_tow(data_tuple):
    transactions = data_tuple
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=100)
    rules = assocrules.mine_assoc_rules(item_sets, min_support=100, min_confidence=0.5)
    print(rules)
Example #32
0
def Apriori_three(data_tuple):
    transactions = perftesting.get_default_transactions()
    relim_input = itemmining.get_relim_input(transactions)
    item_sets = itemmining.relim(relim_input, min_support=50)#////最小关联度
    rules = assocrules.mine_assoc_rules(item_sets, min_support=2, min_confidence=0.5)
    print(rules)
Example #33
0
# groupby combine str
#freq = df.groupby('零售小票编号')['类别描述'].apply(lambda x: "[%s]" % ','.join(x))
freq = df.groupby('零售小票编号')['商品类别'].apply(lambda x: ','.join(x))
freq = freq.map(lambda x: x.strip(',').split(','))

# In[56]:

relim_input = itemmining.get_relim_input(freq)
report = itemmining.relim(relim_input, min_support=30)
report

# In[57]:

rules1 = assocrules.mine_assoc_rules(report,
                                     min_support=30,
                                     min_confidence=0.5)
rules1

# In[58]:

a = []
for line in rules1:
    ## (len(line[0])>1 or len(line[1])>1) could be added for filtering - k-itme set>2  ##
    if ('未知' not in line[0] and '未知' not in line[1]):
        a.append(line)

# In[59]:

result = pd.DataFrame(
    a, columns=['first_set', 'second_set', 'support', 'confidence'])
    item = []
    rating = []
    for i in x:
        item.append(i[0])
    sorted_points = sorted(item)
    return sorted_points

sparseVectorData2 = rdd2.map(lambda a :sv_format2(a[1]))

transactions = sparseVectorData2.collect()

#print sparseVectorData

#Frequent Item Set Mining
relim_input = itemmining.get_relim_input(transactions)
report = itemmining.relim(relim_input, min_support=10)
print report
#Association Rules
rules = assocrules.mine_assoc_rules(report, min_support=10, min_confidence=0.1)
print rules

#==============================================================================
# patterns = pyfpgrowth.find_frequent_patterns(transactions, 10)
# print patterns
# 
# rules = pyfpgrowth.generate_association_rules(patterns, 0.5)
# print rules
#==============================================================================


Example #35
0
def dataMining(user, password, database, output_file, support, confidence):

    db = MySQLdb.connect("localhost", user, password, database)
    cursor = db.cursor()

    sql1 = "SELECT `progressivoSDO` FROM `tracks`.`interventoPrincipale`;"

    cursor.execute(sql1)

    data1 = cursor.fetchall()

    array1 = []

    for row in data1:
        array1.append(row[0])

    sql2 = """SELECT interventoPrincipale.interventoPrincipale, interventiSecondari.interventiSecondari,
    interventoPrincipale.progressivoSDO
    FROM interventoPrincipale
    INNER JOIN interventiSecondari
    ON interventoPrincipale.progressivoSDO=interventiSecondari.progressivoSDO"""

    cursor.execute(sql2)

    data2 = cursor.fetchall()

    array2 = []

    for sdo in array1:
        temp = []
        counter = 0
        for op in data2:
            if op[2] == sdo:
                if counter == 0:
                    temp.append(op[0])
                temp.append(op[1])
                counter += 1
        array2.append(temp)

    input = itemmining.get_relim_input(array2)

    reportFP = itemmining.relim(input, min_support=support)

    with open(output_file, "w") as file:

        file.write(
            """Frequent ItemSets (procedure codes sets) Mining results: \n""")
        file.write("""\n""")
        file.write(
            """Note that due to library source code the results are displayed with the following schema: \n"""
        )
        file.write(
            """       frozenset(procedure codes sets) (support of this sets) \n """
        )
        file.write("""\n""")

        for rep1 in reportFP:
            print(rep1, reportFP[rep1])
            file.writelines(str(rep1) + str(reportFP[rep1]) + "\n")

        reportAR = assocrules.mine_assoc_rules(reportFP,
                                               min_support=support,
                                               min_confidence=confidence)

        file.write("\n")
        file.write("\n")
        file.write("\n")

        file.write(
            """Association Rules ItemSets (procedure codes sets) Mining results: \n"""
        )
        file.write("""\n""")
        file.write(
            """Note that due to library source code the results are displayed with the following schema: \n"""
        )
        file.write(
            """(frozenset(procedure codes sets 1), frozenset(procedure codes sets 2) where available, (support of this sets), (confidence of this sets)) \n """
        )
        file.write("""\n""")

        for rep2 in reportAR:
            print(rep2)
            file.write(str(rep2) + "\n")

    print("All done, check %s to see the results of the data mining" %
          args.filename)
Example #36
0
 def association_rules(self):
     item_sets = self.freq_items()
     rules = assocrules.mine_assoc_rules(item_sets, self.min_sup,
                                         self.min_conf)
     return rules
Example #37
0
from pymining import itemmining, assocrules, perftesting
import csv


def get_my_transactions():
    '''Returns a small list of transactions. For testing purpose.'''
    return (('A0T2', 'A1T2S0'), ('A1T2', 'A1T2S1'), ('A1T3', 'A1T2S0'),
            ('A1T2S0', 'A1T2'), ('A1T3', 'A1T2S1'))
    return (('A1', 'T2'), ('A1', 'T2', 'S0'), ('A1', 'T2'), ('A1', 'T2', 'S0'),
            ('A1', 'T2'), ('A1', 'T2', 'S0'), ('A1', 'T2', 'S0'),
            ('A1', 'T2', 'S0'), ('A1', 'T2'), ('A1', 'T2', 'S0'))


# with open('../row_data', newline='', encoding='utf-8') as csvfile:
#     file_reader = csv.reader(csvfile, delimiter=',')
#     for row in file_reader:
#         data_list = (row[2], row[3], row[4])
#         print(row[2])

# transactions = perftesting.get_default_transactions()
transactions = get_my_transactions()

#print(transactions)
#transactions = transactions
relim_input = itemmining.get_relim_input(transactions)
item_sets = itemmining.relim(relim_input, min_support=0.2)
rules = assocrules.mine_assoc_rules(item_sets,
                                    min_support=0.2,
                                    min_confidence=0.5)
print(rules)