for item in transaction:
            if not [item] in c1:
                c1.append([item])
                c1.sort()
                return map(frozenset, c1)
cd /Users/KarenJung/Documents/loscanadienses
import Apriori.apriori as apriori
%paste
globals()
dataSet
ds
suppData
reload(apriori)
dataSet = ds
dataSet
L, suppData = apriori.apriori(dataSet,minSupport=0.5)
L
suppData
rules=apriori.generateRules(L,suppData,minConf=0.7)
ds
rules=apriori.generateRules(L,suppData,minConf=0.4)
import pandas as pd
sant_df = pd.read_csv('Data/train_ver2.csv')
sant_df.head()
sant_df.describe()
len(sant_df)
sant_df.columns
zip(range(len(sant_df.columns)),sant_df.columns)
baskets_df = sant_df.iloc[:10000,24:]
len(baskets_df)
baskets_df.head()
        for item in transaction:
            if not [item] in c1:
                c1.append([item])
                c1.sort()
                return map(frozenset, c1)
cd /Users/KarenJung/Documents/loscanadienses
import Apriori.apriori as apriori
%paste
globals()
dataSet
ds
suppData
reload(apriori)
dataSet = ds
dataSet
L, suppData = apriori.apriori(dataSet,minSupport=0.5)
L
suppData
rules=apriori.generateRules(L,suppData,minConf=0.7)
ds
rules=apriori.generateRules(L,suppData,minConf=0.4)
import pandas as pd
sant_df = pd.read_csv('Data/train_ver2.csv')
sant_df.head()
sant_df.describe()
len(sant_df)
sant_df.columns
zip(range(len(sant_df.columns)),sant_df.columns)
baskets_df = sant_df.iloc[:10000,24:]
len(baskets_df)
baskets_df.head()
예제 #3
0
from Apriori import apriori, generateRule


def loadDataSet():
    return [[1, 3, 4], [3, 4, 5], [1, 3, 4, 5], [1, 4, 6]]


if __name__ == "__main__":
    dataSet = loadDataSet()
    L, supportData = apriori.apriori(dataSet, 0.5)
    #print(supportData)
    M = generateRule.generateRules(L, supportData, 0.5)
    for t in M:
        print(t)
예제 #4
0
pwd
cd /Users/KarenJung/Documents/loscanadienses
import Apriori.apriori as apriori
import Santander.baskets as sbaskets
sant_df = pd.read_csv('Data/train_ver2.csv')
customers = sbaskets.customers(sant_df)
april = sbaskets.onlyMonth(sant_df,'2016-04-28',customers)
april = sbaskets.onlyMonth(sant_df,'2016-04-28',customers[:10])
reload(sbaskets)
april = sbaskets.onlyMonth(sant_df,'2016-04-28',customers[:10])
april = sbaskets.onlyMonth(sant_df,'2016-04-28',customers[:1000])
reload(sbaskets)
april = sbaskets.onlyMonth(sant_df,'2016-04-28',customers[:1000])
%%time
sb = sbaskets.makeBaskets(sant_df,[0,1000000],[24,48])
Ls, supp = apriori.apriori(sb,minSupport=0.01)
%%time
sb = sbaskets.makeBaskets(sant_df,[0,100000],[24,48])
%%time
Ls, supp = apriori.apriori(sb,minSupport=0.01)
%%time
sb = sbaskets.makeBaskets(sant_df,[0,10000],[24,48])
%%time
Ls, supp = apriori.apriori(sb,minSupport=0.05)
sb[:10]
%%time
Ls, supp = apriori.apriori(sb,minSupport=0.05,withNcodpers=False)
help(sbaskets.makeBaskets)
%%time
sb = sbaskets.makeBaskets(sant_df,[0,10000],[24,48],withNcodpers=False)
%%time
예제 #5
0
min_supp = 0.02
min_conf = 0.25
rule_batches = []
nbatches = 20
batchsize = 10000
seed = 666
# random state (to set seed for repeatable training)
ran_state = np.random.RandomState(seed)
for i in xrange(nbatches):
    print('Training with batch number ... %d' % i)
    sample_df = sant_df.sample(n=batchsize, random_state=ran_state)
    # convert batch of customer data into "baskets" of financial products
    sb = sbaskets.makeBaskets(sample_df, (0, batchsize), (24, 48),
                              withNcodpers=False)
    # find all sets of products that occur in at least min_supp of the baskets
    Ls, supp = apriori.apriori(sb, minSupport=min_supp)
    # find all rules that have confidence at least min_conf
    # (i.e. a rule A=>B is a pair of subsets A and B such that B occurs in
    # at least a fraction minConf of the baskets in which A occurs)
    rule_batches.append(apriori.generateRules(Ls, supp, minConf=min_conf))
# flatten list of rules into list of pairs of form ((ls,rs),conf),
# where ls and rs are *sorted* lists of left-hand-sides and right-hand-sides
# of the rules
rules = [((sorted(list(rule[0])), sorted(list(rule[1]))), rule[2])
         for rules in rule_batches for rule in rules]
# loop over rules and average confidence of each rule
rules_unique = []
confs_average = []
for rule in rules:
    if rule[0] in rules_unique:
        confs_average[rules_unique.index(rule[0])] += rule[1]