def test_run(self): ap = Apriori(self.tidList, 63) result = ap.run() sort_f = lambda x: (x.support, x.item) result.sort(key=sort_f) self.targetSet.sort(key=sort_f) self.assertListEqual(result, self.targetSet) restrict_items = ["Bagel", "Cheese", "Milk"] ap_restrict = Apriori(self.tidList, 63, restrict_items=restrict_items) result_restrict = ap_restrict.run() result_restrict.sort(key=sort_f) target_restrict = list( filter(lambda x: x.contains(restrict_items), self.targetSet)) self.assertListEqual(result_restrict, target_restrict)
def main(): # Get the command line args args = sys.argv[1:] # Check to make sure correct number of args were added if len(args) != 3: print "Usage: python main.py <inputFile.ext> <min_support(int)> <outputFile.ext>" sys.exit() # Initialize Data class with the input file data_class = Data(args[0]) # Get the data as 2D array print "Getting Data from file..." start = time.time() data_array = data_class.get_data() end = time.time() print "Done - Took %1.1f seconds" % (end - start) min_support = int(args[1]) a = Apriori(min_support) print "Performing Apriori Algorithm..." start = time.time() results = a.apriori(data_array) end = time.time() print "Done - Took %1.1f seconds" % (end - start) print "Writing to " + args[2] + "..." start = time.time() Output(results, args[2]) end = time.time() print "Done - Took %1.1f seconds" % (end - start)
def test_apriori(data_set, min_sup=0.05): start = datetime.now() apriori = Apriori(data_set) apriori.generate_L(min_sup=min_sup) deltatime = datetime.now() - start print("Apriori over") return deltatime.seconds + deltatime.microseconds / 1000000
def setUp(self): pp = PreProcessor() pp.parse_file("../data/samples.txt") transactions = pp.get_transactions() uniques = pp.get_uniques() sup = 2.0 conf = 0.374 self.apriori = Apriori(transactions, uniques, sup, conf)
def gen_rules(uii, support, confidence): t0 = time() # generate associated rules print('generate associated rules....') apri = Apriori(uii, support, confidence) (items_id, allRules) = apri.genRules() print('Time took for generating rules:', time()-t0, 's') # items_id 频繁一项集 return (items_id, allRules)
def main(): args = p.parse_args() confidence = float(args.confidence) support = float(args.support) max_length = int(args.max_length) print("Apriori (support=%.3f, confidence=%.3f, max_length=%d)" % (support, confidence, max_length)) gitpy = git.Git(CLONE_PATH) # fetching repository / folder project_path = args.dir if args.repository: project_folder = "clone-%s" % generate_hash() project_path = "%s/%s" % (CLONE_PATH, project_folder) print("Cloning repository to %s..." % project_path) gitpy.clone(args.repository, project_folder) print("number of commits: %d" % (get_commit_count(project_path))) print("support absolute value: %d" % (support * get_commit_count(project_path))) # defining language parser parser = BaseParser if args.lang == "go": parser = GoParser elif args.lang == "java": parser = JavaParser elif args.lang == "python": parser = PythonParser print("parsing project...") function_changes = [] for commit in RepositoryMining(project_path).traverse_commits(): language_parser = parser(project_path, commit.hash) changes = language_parser.get_diff() if changes: function_changes.append(changes) print("Transactions:") for changes in function_changes: print(changes) print("analyzing transactions...") apriori = Apriori(function_changes, confidence=float(confidence), support=float(support), max_length=int(max_length)) rules = apriori.get_rules() print("Association rules:") for rule in rules: print(rule)
def generate_candidates(self): partitions = self.partition_data() for key, value in partitions.items(): candidates = Apriori(value, math.floor(self.support_count // self.k), self.confidence_pct).all_frequent A = Counter(self.candidates) B = Counter(candidates) C = A + B c = dict(C) self.candidates = c
def apriori(): transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]]) print ("- Apriori -") min_sup = 0.25 min_conf = 0.8 print ("Minimum Support: %.2f, Minimum Confidence: %s" % (min_sup, min_conf)) print ("Transactions:") for transaction in transactions: print ("\t%s" % transaction) apriori = Apriori(min_support=min_sup, min_confidence=min_conf) apriori.evaluate(transactions)
def run(database, type, min_support=0.05, min_confidence=0.5): if type == 'basic': if database == 'Acme-Supermarket': preprocessing = AcmeSupermarket(acme_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, acme_filepath) rules = algorithm.run() preprocessing.save_rules(rules) elif database == 'Movielens': preprocessing = Movielens(movie_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, movie_filepath) rules = algorithm.run() # Print? else: preprocessing = Retails(retails_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, retails_filepath) rules = algorithm.run() # print? else: if database == 'Acme-Supermarket': preprocessing = AcmeSupermarket(acme_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, acme_filepath) rules = algorithm.run() preprocessing.save_rules(rules) elif database == 'Movielens': preprocessing = Movielens(movie_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, movie_filepath) rules = algorithm.run() # Print? else: preprocessing = Retails(retails_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, retails_filepath) rules = algorithm.run()
def main(): #read config.csv and set flags and paths aprioriObj = Apriori() readConfigFile(aprioriObj) readInputFile(aprioriObj) initPrune(aprioriObj) aprioriObj.mineFrequentItemSets(1) aprioriObj.printFreqItemsets() if aprioriObj.asscnFlag == 1: aprioriObj.printAssociationRules() aprioriObj.outFile.close()
def run_algorithms(support, confidence): start = time.time() ap = AprioriPartition(transactions, support, confidence, 4) end = time.time() ap_time = end - start assoc_filename = str(support) + '_associations_ap' freq_filename = str(support)+'_freqItemsets_ap' write_list_to_file(assoc_filename, ap.associations, 'AprioriPartition', ap_time) write_dict_to_file(freq_filename, ap.all_frequent, 'AprioriParition', ap_time) print("AprioriPartition at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(ap.all_frequent))) print("Associations: " + str(len(ap.associations))) print("Time to run: "+str(ap_time)+"\n") start = time.time() a = Apriori(transactions, support, confidence) end = time.time() a_time = end - start assoc_filename = str(support) + '_associations_a' freq_filename = str(support)+'_freqItemsets_a' write_list_to_file(assoc_filename, a.associations, 'Apriori', a_time) write_dict_to_file(freq_filename, a.all_frequent, 'Apriori', a_time) print("Apriori at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(a.all_frequent))) print("Associations: " + str(len(a.associations))) print("Time to run: "+str(a_time)+"\n") start = time.time() fp = FPTree(transactions, support, confidence) end = time.time() fp_time = end - start assoc_filename = str(support) + '_associations_fp' freq_filename = str(support) + '_freqItemsets_fp' write_list_to_file(assoc_filename, fp.associations, 'FPGrowth', fp_time) write_dict_to_file(freq_filename, fp.all_frequent, 'FPGrowth', fp_time) print("FPGrowth at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(fp.all_frequent))) print("Associations: " + str(len(fp.associations))) print("Time to run: " + str(fp_time) + "\n") print("Asserts both algorithms generate the same association rule set: ", test_algos(a.associations, fp.associations)) print("==========================\n")
def main(args): stats = Stats() transactions = TransactionsList(args.infile) if args.algorithm == 'apriori': algorithm = Apriori(transactions, args.minsup) else: algorithm = Dic(transactions, args.minsup, args.m) large_sets, counter = algorithm.get_large_sets_and_counter() stats.record_post_large_sets() rules = RulesGenerator.generate_rules(large_sets, args.minconf, counter, transactions) stats.record_post_rules() writer = Writer(args.outfile) writer.add_args(args) writer.add_stats(stats) writer.add_rules(rules) writer.write()
def main(filename='data/data.txt', min_support=4, min_confidence=1): transactions = get_transactions_from_file(filename) apriori = Apriori(transactions, min_support, min_confidence) elements, rules = apriori.perform() print('Elements:') for elem in sorted(elements): for item in sorted([sorted(item) for item in elem]): print(item) print() print('Rules:') rules = sorted(rules, key=lambda x: list(x[0])) rules = sorted(rules, key=lambda x: len(x[0])) for head, tail in rules: print('{} -> {}'.format(sorted(head), sorted(tail)))
def buildRules(): level = ['优', '良', '中', '差'] apr = Apriori() db = DatabaseOperations() ans = [] for item in level: data = load_data(item) L, supportData = apr.apriori(data, minSupport=0.5) rules = apr.generateRules(L, supportData, minConf=0.6) rules = list(rules) i = 0 # sort the convidence about rules and print top 3 rules sorted_rules = sorted(rules, key=lambda x: float(x[2]), reverse=True) for rule in sorted_rules: pre_course_num = '' pre_course_name = '' for item in list(list(rule)[0]): # ans = db.search_course(item[:-2]) pre_course_num += item[:-2] + ',' pre_course_name += db.search_course( item[:-2])[0][1] + ' ~ ' + item[-1:] + ',' # pre_name = db.search_course(pre_course_num[:-2]) last_course_num = '' last_course_name = '' for item in list(list(rule)[1]): last_course_num += item[:-2] + ',' last_course_name += db.search_course( item[:-2])[0][1] + ' ~ ' + item[-1:] + ',' # last_name = db.search_course(last_course_num[:-2]) # print(pre_course_num+"-->"+last_course_num+" "+str(list(rule)[2])) ans.append([ pre_course_num, pre_course_name, last_course_num, last_course_name, str(list(rule)[2]) ]) i += 1 if i >= 3: break return ans
# -*- coding: utf-8 -*- from apriori import Apriori, ImprovedApriori with open('after.csv', 'r') as fp: lines = fp.readlines() dataSet3 = [] for line in lines: line = line.rstrip() dataSet3.append(line.split(",")) minsup = 0.005 minconf = 0.5 if __name__ == '__main__': # test1 apriori = Apriori(dataSet3, minsup, minconf) apriori.run() apriori.print_frequent_itemset() apriori.print_rule()
optParser.add_option('-s', '--minSup', dest='minSup', help='Mininum support', type='float', default=0.01) # mininum support value optParser.add_option('-n', '--fname', dest='fname', help='file name', type='string', default='patterns.txt') # name file to save all-term freq (options, args) = optParser.parse_args() # Get parameters filePath = options.filePath minSup = options.minSup fname = options.fname print("""Parameters: \n - filePath: {} \n - mininum support: {} \n - save file name: {} \n""".\ format(filePath,minSup,fname)) # Run and save objApriori = Apriori(minSup) itemCountDict, freqSet = objApriori.fit(filePath) with open(fname,"w+") as f: for key, values in freqSet.items(): for var in values: line = ';'.join([x for x in var]) line = '{}:'.format(itemCountDict[var])+line +'\n' f.write(line) print("Save done!")
from apriori import Apriori, CollapsedCsvFileReader from assoc import (calculate_confidence, generate_associations, get_support_for_key, generate_combo_itemsets, create_associations, assocation_record) logging.basicConfig() logger = logging.getLogger('apriori') logger.setLevel(logging.INFO) #TODO: verify #fr = CollapsedCsvFileReader('./data/test4.csv') # OK # fr = CollapsedCsvFileReader('./data/test-dm-bookch6.csv') fr = CollapsedCsvFileReader('./data/test3.csv') t2 = fr.read() g = Apriori(t2) support = 0.22 confidence = 0.0 output = g.generate_levels(support_level=support, drop_below_support=True) pc = create_associations(output) f = calculate_confidence(pc, confidence_level=confidence) print(f) print(len(f))
def test_algos(rules_a, rules_fp): for rule in rules_a: if rule in rules_fp: rules_fp.remove(rule) else: return False if len(rules_fp) == 0: return True else: return False test = { 'T100': ['Mood', 'Only', 'N', 'Kale', 'Ear', 'Yes'], 'T200': ['D', 'Only', 'N', 'Kale', 'Ear', 'Yes'], 'T300': ['Mood', 'A', 'Kale', 'Ear'], 'T400': ['Mood', 'U', 'C', 'Kale', 'Yes'], 'T500': ['C', 'Only', 'Only', 'Kale', 'I', 'Ear'] } a = Apriori(test, 2, .8) fp = FPTree(test, 2, .8) all_a = a.generate_all_rules() all_fp = fp.generate_all_rules() print("Asserts both algorithms generate the same rule set: ", test_algos(all_a, all_fp))
global score_is_calculated guessingGame = True if left_circle or right_circle : client.leave_store() if (client.x, client.y) == (280, 600): left_circle = False right_circle = False guessingGame = False client = None # to generate a new client and set a new path for him num_transaction += 1 is_drawn = False score_is_calculated = False client = Client() client = Client() apriori = Apriori() antecedents, consequents = apriori.get_rules() known_transactions = apriori.get_known_transactions() unknown_transactions = [ ('milk','chocolate','croissant'), ('burger','pizza','icecream'), ('soda','icecream','pistachio'), ('chocolate','pistachio','honey'), ('shrimp','fish','sweets'), ('mushroom','medicine'), ('cheese','chicken','fruits','meat'), ('icecream','burger','pizza'), ('milk','biscuit','egg'), ('shrimp','sweets','fish'), ('honey','chocolate','pistachio'), ('medicine','mushroom'),
help='Right destination', type='string', default=None) # (options, args) = optParser.parse_args() # Get two important parameters filePath = options.filePath minSupp = options.minSupp minConf = options.minConf rhs = frozenset([options.rhs]) print("""Parameters: \n - filePath: {} \n - mininum support: {} \n - mininum confidence: {} \n - rhs: {}\n""".\ format(filePath,minSupp,minConf, rhs)) # Run and print objApriori = Apriori(minSupp, minConf) itemCountDict, freqSet = objApriori.fit(filePath) for key, value in freqSet.items(): print('frequent {}-term set:'.format(key)) print('-' * 20) for itemset in value: print(list(itemset)) print() # Return rules with regard of `rhs` rules, conf_rules = objApriori.getSpecRules(filePath, rhs) print('-' * 20) print('rules refer to {}'.format(list(rhs))) print('제품 연관도 기댓값')
#T = pd.read_csv("list6/task2Data/retail.dat", header=None) #print(T) X = set() T = [] for line in open("list6/task3Data/kosarak.dat"): t = frozenset( [int(x) for x in line.split()] ) X |= t T.append(t) assoc = Apriori(X, T, alpha=0.01, debug=True) best_rules = [] for a,b in assoc.rules: conf, lift, suppA, suppB, suppAB = assoc._get_stats(a,b) best_rules.append((lift,conf,len(a)+len(b),suppAB, a,b )) i = 0 for lift, conf, _, suppAB, a, b in sorted(best_rules, reverse=True): print("{} => {} \t lift: {}, conf: {}, supp: {}".format(tuple(a), tuple(b), lift, conf, suppAB))
def apriori(): session.rollback() users = session.query(User).all() favs = list() for user in users: favIds = list() favorites = session.query(Favorite).filter_by( user_mail=user.mail).all() for favorite in favorites: favIds.append(str(favorite.recipe_id)) favs.append(favIds) minsup = 0.4 minconf = 0.4 apriori = Apriori(favs, minsup, minconf) apriori.run() apriori.print_frequent_itemset() result = apriori.print_rule() # print(result) apriori_advices = list() for res in result: x = res.split(' ==> ') # print(len(x[0])) if len(x[0]) <= 2: # print(x[0]) apriori_advices.append(x[0]) apriori_advices.append(x[1]) # print(apriori_advices) key = list() value = list() i = 0 while i < len(apriori_advices): key.append(int(apriori_advices[i])) value.append(int(apriori_advices[i + 1])) i += 4 favorites = session.query(Favorite).filter_by(user_mail=g.user.mail).all() print(favorites) favoriteIds = list() for favorite in favorites: favoriteIds.append(favorite.recipe_id) print(favoriteIds) print(key) adviceIds = list() i = 0 if (len(favoriteIds) > 0): while i < len(key): if contains(fs, key[i]) != True: adviceIds.append(value[i]) i += 1 print(adviceIds) adviceIds = list(set(adviceIds)) print(adviceIds) advices = session.query(Recipe).filter(Recipe.id.in_(adviceIds)).all() return jsonify([advice.serialize for advice in advices]), 201
from apriori import Apriori dataset = [ ['Elma', 'Yag'], ['Elma', 'Yag', 'Cikolata', 'Ekmek'], ['Elma', 'Cikolata'], ['Elma', 'Cikolata'], ] minsup = 0.3 minconf = 0.6 apriori = Apriori(dataset, minsup, minconf) #apriori.set_selected_items(['1']) apriori.run() apriori.print_frequent_itemset() result = apriori.print_rule() print(result) for res in result: x = res.split(' ==> ') print(x[0]) print(x[1]) # print(res[1])
from apriori import Apriori import argparse if __name__ == "__main__": parser = argparse.ArgumentParser( description="Apriori algorithm for groceries dataset.") parser.add_argument('-s', '--support', default=0.02, type=float) parser.add_argument('-c', '--confidence', default=0.3, type=float) parser.add_argument('-f', '--file', default="groceries.csv") args = parser.parse_args() apriori = Apriori(args.file, _support=args.support, _confidence=args.confidence) apriori.expand() apriori.make_res() apriori.print_res()
def apriori_test(): apriori = Apriori(min_support=0.45, min_conf=0.918) freqsets_lst, support_counter = apriori.find_freqsets(dataset) rules = apriori.generate_rules(freqsets_lst, support_counter) return rules
"D ": "35-39", "E ": "40-44", "F ": "45-49", "G ": "50-54", "H ": "55-59", "I ": "60-64", "J ": ">65" }) print(T.head(), file=sys.stderr) X = set() Ts = [] for i in range(len(T)): t = frozenset(T.loc[i + 1]) X |= t Ts.append(t) assoc = Apriori(X, Ts, alpha=0.003, debug=True) best_rules = [] for a, b in assoc.rules: conf, lift, suppA, suppB, suppAB = assoc._get_stats(a, b) best_rules.append((lift, conf, len(a) + len(b), suppAB, a, b)) i = 0 for lift, conf, _, suppAB, a, b in sorted(best_rules, reverse=True): print("{} => {} \t lift: {}, conf: {}, supp: {}".format( tuple(a), tuple(b), lift, conf, suppAB))
from apriori import Apriori transactions = [ [1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3] ] support = 0.2 confidence = 0.4 a = Apriori(transactions, support, confidence) fs = a.getFrequentSubset() print('frequent sets......') for i in fs: print(i) print('\n') print('associate rules......') rules = a.genRules(fs) for r in rules: print(r)
from apriori import Apriori items = [ 'bread', 'milk', 'cheese', 'beer', 'umbrella', 'diaper', 'water', 'detergent' ] trans = [] trans.append('milk,beer,umbrella,diaper'.split(',')) trans.append('umbrella,diaper,beer,detergent,bread'.split(',')) trans.append('beer,water,diaper,detergent'.split(',')) trans.append('beer,cheese,diaper,detergent,bread'.split(',')) trans.append('beer,umbrella,diaper,water'.split(',')) ap = Apriori() ap.calculate(items, trans, 0.4, 0.7) ap.info()
#Load in papers# #fPaper = open("./microsoft/Papers.txt", "r") #Papers = [] #for line in fPaper.readlines(): # words = line.split("\t") # Papers.append(paper(words[0], words[1], words[2], words[3], words[7], words[9])) #Load in keywords #fKeywords = open("./microsoft/PaperKeywords.txt", "r") #paperKeywords = [] #for line in fKeywords.readlines(): # words = line.split("\t") # paperKeywords.append(paperKeyword(words[0], words[1])) #Load in paperAuthorAffiliations fPAA = open("./microsoft/PaperAuthorAffiliations.txt", "r") PAA = [] for line in fPAA.readlines(): words = line.split("\t") PAA.append( paperAuthorAffiliation(words[0], words[1], words[2], words[4], words[5])) #Load in Authors #fAuthor = open("./microsoft/Authors.txt", "r") #Authors = [] #for line in fAuthor.readlines(): # words = line.split("\t") # Authors.append(author(words[0], words[1])) print('finished loading files') #call Apriori with minsup = 2 Apriori(PAA, 30)
from apriori import Apriori if __name__ == '__main__': while 1: filePath = input("Please input the file name:") minSupp = float(input("Please input minimum support:")) minConf = float(input("Please input minimum confidence:")) print("filePath:", filePath, "\n" "mininum support:", minSupp, "\n" "mininum confidence:", minConf, "\n") # Run and print Apriori_gen = Apriori(minSupp, minConf) freqSet = Apriori_gen.frequentCount(filePath) for key, value in freqSet.items(): print('frequent {}-term set:'.format(key)) print('-' * 20) for itemset in value: print(list(itemset)) print() # Return rules with regard of `rhs` for key, value in freqSet.items(): for item in value: rhs = item association_rules = Apriori_gen.find_Association_Rules(rhs) print('-' * 20) print('association_rules refer to {}'.format(list(rhs))) for key, value in association_rules.items():