#======================================================================= #=========================================================================== # 输出sub_catlist中的频繁项,运用fp_growth算法 #=========================================================================== catfreq_list, support_data_cat = fp_growth.fptree(sub_catlist, int(min_support*len(sub_catlist))) #======================================================================= # print('catfreq_list is: ') # for i in range(len(catfreq_list)): # print(catfreq_list[i]) # print('-----------------------------------------') #======================================================================= #=========================================================================== # for i in support_data.items(): # print(i) #=========================================================================== big_rule_list = apriori.generate_rules(catfreq_list[0:2], support_data_cat, min_confidence) #print('rule list follows: ') cat_rule_list = [] for rule in big_rule_list: if test_one_category in rule[0] and len(rule[0]) == 1: cat_rule_list.append(rule) #print(rule) print('-----------------------------------------') cat_rule_list.sort(key= lambda p: p[2], reverse=True) #=================================================================== # print('cat_rule_list is :') # for i in cat_rule_list: # print(i) # print('-----------------------------------------') #=================================================================== #只取类型关联集的前两个类
import apriori sample_transactions = [ ['fish', 'white wine', 'cheese', 'bread'], ['beer', 'nachos', 'cheese', 'peanuts'], ['white wine', 'cheese'], ['white wine', 'cheese', 'bread'] ] for rule in apriori.generate_rules(sample_transactions, min_support=0.5): msg = (f'{rule.format_rule():20s}\t\t' f'(support={rule.support:0.4f}, confidence={rule.confidence:0.4f}, lift={rule.lift:0.4f})') print(msg) print("Example with string info. Note that everyone has a cat (lots of support, but lift is 1 as cat has no info)") pet_ownership = [['cat', 'dog'], ['cat', 'fish'], ['cat', 'dog', 'fish'], ['cat', 'fish', 'horse'], ['cat', 'horse', 'dog'], ['cat', 'horse', 'dog'], ['cat', 'horse', 'dog']] for rule in apriori.generate_rules(pet_ownership, min_support=0.2, min_lift=1.05): msg = (f'{rule.format_rule():20s}\t\t' f'(support={rule.support:0.4f}, confidence={rule.confidence:0.4f}, lift={rule.lift:0.4f})') print(msg)
def recommend(recommendationfunction='trail', inputverbs=None, **kwargs): '''Generate new recommendation rules for every assignment that is of some importance, based on completed and launched media, questions, assessments. Note that only questions and media may be recommended, as the filter should already make sure that the found questions/media are only relevant for the current assessment. TODO: - Check if milestone passed - Decay for past milestones + assigments - Alter filter for smaller query, perhaps multiple queries? Don't try to run if 1. Top X will not be altered, based on people submitted/threshold conf/sup ''' # By default, get all info from users who completed if inputverbs: verbs = [TinCan.VERBS[verb]['id'] for verb in inputverbs] else: verbs = [TinCan.VERBS['completed']['id']] max_consequent_size = kwargs['max_consequent_size'] if \ 'max_consequent_size' in kwargs else 1 verbose = kwargs['verbose'] if 'verbose' in kwargs else False milestones = defaultdict(lambda : 'NO_ASSESSMENT') # progress per actor assessment_ids = [] transactions = dict() freq = dict() name_description = dict() activities = Activity.objects.order_by("-time") for activity in activities: if activity.verb not in verbs: continue actor = activity.user name_description[activity.activity] = \ (activity.name, activity.description) # Use assessments to separate timeslices per actor if activity.type == TinCan.ACTIVITY_TYPES['assessment']: assessment_id = activity.activity milestones[actor] = assessment_id # For every milestone: # Keep track of all assessments in reverse chronological order as well if not assessment_id in assessment_ids: transactions[assessment_id] = defaultdict(list) # verbs+objects /actor freq[assessment_id] = {0: defaultdict(int)} # frequency of 1-pairs assessment_ids.append(assessment_id) else: assessment_id = milestones[actor] if assessment_id == 'NO_ASSESSMENT': continue value = activity.value if activity.verb == TinCan.VERBS['progressed']['id'] else 1.0 statement_obj = activity.activity transactions[assessment_id][actor].append((statement_obj, value)) freq[assessment_id][0][(statement_obj,)] += 1 # Use baskets as transactions and recommend rulebase = [] for assessment_id in assessment_ids: D = transactions[assessment_id] L = freq[assessment_id] rules = [] print 'Generating rules for assessment ', assessment_id if recommendationfunction == 'apriori': minsup = kwargs['minsup'] minconf = kwargs['minconf'] rules = apriori.generate_rules(apriori.apriori, D, L, minsup, minconf, max_consequent_size, verbose=verbose, veryverbose=False) elif recommendationfunction == 'trail': gamma = kwargs['gamma'] minsupp = kwargs['minsup'] minconf = kwargs['minconf'] rules = trail.generate_rules(D, gamma, minsupp, minconf,verbose = verbose) # Save found rules based on the relevant assessment. for ante, conse, confidence, support in rules: rule = {'milestone': assessment_id, # id indicating assessment 'antecedent': ante, # LHS 'consequent': conse, # RHS 'confidence': confidence, # Confidence for LHS->RHS 'support': support} # Support for the rule rulebase.append(rule) return rulebase, name_description
# -*- coding: utf-8 -*- import apriori dataset = apriori.load_data_set() c1 = apriori.create_c1(dataset) d = map(set, dataset) ret, support_data = apriori.scan_d(d, c1, 0.5) import apriori dataset = apriori.load_data_set() L, sd = apriori.find_freq_set(dataset, 0.5) L, sd = apriori.find_freq_set(dataset, 0.7) import apriori dataset = apriori.load_data_set() L, sd = apriori.find_freq_set(dataset, 0.5) rules = apriori.generate_rules(L, sd, 0.7) import apriori dataset = [line.split() for line in open('chapter11/mushroom.dat').readlines()] L, sd = apriori.find_freq_set(dataset, 0.3) for itemset in L[1]: if itemset.intersection('2'): print itemset
if testcategory[0] in catlist[i]: sub_catlist.append(catlist[i]) # =========================================================================== # 输出sub_catlist 10行 # =========================================================================== # =========================================================================== # for i in range(10): # print(sub_catlist[i]) print("sub_catlist's length is : ", len(sub_catlist)) # =========================================================================== # =========================================================================== # 输出sub_catlist中的频繁项,运用fp_growth算法 # =========================================================================== catfreq_list, support_data = fp_growth.fptree(sub_catlist, 100000) print("catfreq_list is: ") for i in range(len(catfreq_list)): print(catfreq_list[i]) # =========================================================================== # for i in support_data.items(): # print(i) # =========================================================================== big_rule_list = apriori.generate_rules(catfreq_list, support_data, 0.5) print("rule list follows: ") for rule in big_rule_list: if test_one in rule[0] and len(rule[0]) == 1: print(rule)