def test_apriori(data_set, min_sup=0.05): start = datetime.now() apriori = Apriori(data_set) apriori.generate_L(min_sup=min_sup) deltatime = datetime.now() - start print("Apriori over") return deltatime.seconds + deltatime.microseconds / 1000000
def main(): # Get the command line args args = sys.argv[1:] # Check to make sure correct number of args were added if len(args) != 3: print "Usage: python main.py <inputFile.ext> <min_support(int)> <outputFile.ext>" sys.exit() # Initialize Data class with the input file data_class = Data(args[0]) # Get the data as 2D array print "Getting Data from file..." start = time.time() data_array = data_class.get_data() end = time.time() print "Done - Took %1.1f seconds" % (end - start) min_support = int(args[1]) a = Apriori(min_support) print "Performing Apriori Algorithm..." start = time.time() results = a.apriori(data_array) end = time.time() print "Done - Took %1.1f seconds" % (end - start) print "Writing to " + args[2] + "..." start = time.time() Output(results, args[2]) end = time.time() print "Done - Took %1.1f seconds" % (end - start)
def process(infile, algorithm_name, support, confidence, m, random, partial): stats = Stats() transactions = TransactionsList(infile) stats.record_post_large_sets() stats.record_post_rules() last_total_time = stats.real_time last_user_time = stats.user_time stats = Stats() if algorithm_name == 'apriori': algorithm = Apriori(transactions, support) else: algorithm = Dic(transactions, support, m, random, partial) large_sets, counter = algorithm.get_large_sets_and_counter() stats.record_post_large_sets() rules = RulesGenerator.generate_rules(large_sets, confidence, counter, transactions) stats.record_post_rules() large_len = len(large_sets) total_time = stats.real_time - last_total_time user_time = stats.user_time - last_user_time large_sets_time = stats.set_gen_time - last_total_time last_total_time = stats.real_time last_user_time = stats.user_time memory = stats.memory_use rules_no = len(rules) print "{infile}\t{algorithm_name}\t{support}\t{confidence}\t{m}\t{rules_no}\t{large_len}\t{memory}\t{total_time}\t{user_time}\t{large_sets_time}\t{partial}\t{random}".format(**locals())
def gen_rules(uii, support, confidence): t0 = time() # generate associated rules print('generate associated rules....') apri = Apriori(uii, support, confidence) (items_id, allRules) = apri.genRules() print('Time took for generating rules:', time()-t0, 's') # items_id 频繁一项集 return (items_id, allRules)
def main(): args = p.parse_args() confidence = float(args.confidence) support = float(args.support) max_length = int(args.max_length) print("Apriori (support=%.3f, confidence=%.3f, max_length=%d)" % (support, confidence, max_length)) gitpy = git.Git(CLONE_PATH) # fetching repository / folder project_path = args.dir if args.repository: project_folder = "clone-%s" % generate_hash() project_path = "%s/%s" % (CLONE_PATH, project_folder) print("Cloning repository to %s..." % project_path) gitpy.clone(args.repository, project_folder) print("number of commits: %d" % (get_commit_count(project_path))) print("support absolute value: %d" % (support * get_commit_count(project_path))) # defining language parser parser = BaseParser if args.lang == "go": parser = GoParser elif args.lang == "java": parser = JavaParser elif args.lang == "python": parser = PythonParser print("parsing project...") function_changes = [] for commit in RepositoryMining(project_path).traverse_commits(): language_parser = parser(project_path, commit.hash) changes = language_parser.get_diff() if changes: function_changes.append(changes) print("Transactions:") for changes in function_changes: print(changes) print("analyzing transactions...") apriori = Apriori(function_changes, confidence=float(confidence), support=float(support), max_length=int(max_length)) rules = apriori.get_rules() print("Association rules:") for rule in rules: print(rule)
def apriori(): transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]]) print ("- Apriori -") min_sup = 0.25 min_conf = 0.8 print ("Minimum Support: %.2f, Minimum Confidence: %s" % (min_sup, min_conf)) print ("Transactions:") for transaction in transactions: print ("\t%s" % transaction) apriori = Apriori(min_support=min_sup, min_confidence=min_conf) apriori.evaluate(transactions)
def main(args): stats = Stats() transactions = TransactionsList(args.infile) if args.algorithm == 'apriori': algorithm = Apriori(transactions, args.minsup) else: algorithm = Dic(transactions, args.minsup, args.m) large_sets, counter = algorithm.get_large_sets_and_counter() stats.record_post_large_sets() rules = RulesGenerator.generate_rules(large_sets, args.minconf, counter, transactions) stats.record_post_rules() writer = Writer(args.outfile) writer.add_args(args) writer.add_stats(stats) writer.add_rules(rules) writer.write()
def setUp(self): pp = PreProcessor() pp.parse_file("../data/samples.txt") transactions = pp.get_transactions() uniques = pp.get_uniques() sup = 2.0 conf = 0.374 self.apriori = Apriori(transactions, uniques, sup, conf)
class AprioriTestCase(unittest.TestCase): def setUp(self): _list = [ [1, 2, 3], [1, 4], [4, 5], [1, 2, 4], [1, 2, 6, 4, 3], [2, 6, 3], [2, 3, 6], ] #trans list to Collection tem_list = Collection() for l in _list: tem = Collection() tem_list.append(tem) for m in l: tem.append(m) self.apri = Apriori(tem_list, 0.3, 0.8) def testInit_pass(self): test_console('init_pass') self.apri.init_pass() print '.. self.apri.C : ' print self.apri.C def testCadidate_gen(self): temF = [ [1, 2, 3], [2, 3, 4], [4, 5, 6], [1, 2, 3, 4], [6, 7, 8], ] collections = self.apri.Cadidate_gen(temF) print '.. temF' print temF print '.. Cadidate_gen' print collections def testRun(self): test_console('run') self.apri.run()
def main(filename='data/data.txt', min_support=4, min_confidence=1): transactions = get_transactions_from_file(filename) apriori = Apriori(transactions, min_support, min_confidence) elements, rules = apriori.perform() print('Elements:') for elem in sorted(elements): for item in sorted([sorted(item) for item in elem]): print(item) print() print('Rules:') rules = sorted(rules, key=lambda x: list(x[0])) rules = sorted(rules, key=lambda x: len(x[0])) for head, tail in rules: print('{} -> {}'.format(sorted(head), sorted(tail)))
def generate_candidates(self): partitions = self.partition_data() for key, value in partitions.items(): candidates = Apriori(value, math.floor(self.support_count // self.k), self.confidence_pct).all_frequent A = Counter(self.candidates) B = Counter(candidates) C = A + B c = dict(C) self.candidates = c
def main(): #read config.csv and set flags and paths aprioriObj = Apriori() readConfigFile(aprioriObj) readInputFile(aprioriObj) initPrune(aprioriObj) aprioriObj.mineFrequentItemSets(1) aprioriObj.printFreqItemsets() if aprioriObj.asscnFlag == 1: aprioriObj.printAssociationRules() aprioriObj.outFile.close()
def buildRules(): level = ['优', '良', '中', '差'] apr = Apriori() db = DatabaseOperations() ans = [] for item in level: data = load_data(item) L, supportData = apr.apriori(data, minSupport=0.5) rules = apr.generateRules(L, supportData, minConf=0.6) rules = list(rules) i = 0 # sort the convidence about rules and print top 3 rules sorted_rules = sorted(rules, key=lambda x: float(x[2]), reverse=True) for rule in sorted_rules: pre_course_num = '' pre_course_name = '' for item in list(list(rule)[0]): # ans = db.search_course(item[:-2]) pre_course_num += item[:-2] + ',' pre_course_name += db.search_course( item[:-2])[0][1] + ' ~ ' + item[-1:] + ',' # pre_name = db.search_course(pre_course_num[:-2]) last_course_num = '' last_course_name = '' for item in list(list(rule)[1]): last_course_num += item[:-2] + ',' last_course_name += db.search_course( item[:-2])[0][1] + ' ~ ' + item[-1:] + ',' # last_name = db.search_course(last_course_num[:-2]) # print(pre_course_num+"-->"+last_course_num+" "+str(list(rule)[2])) ans.append([ pre_course_num, pre_course_name, last_course_num, last_course_name, str(list(rule)[2]) ]) i += 1 if i >= 3: break return ans
def run_algorithms(support, confidence): start = time.time() ap = AprioriPartition(transactions, support, confidence, 4) end = time.time() ap_time = end - start assoc_filename = str(support) + '_associations_ap' freq_filename = str(support)+'_freqItemsets_ap' write_list_to_file(assoc_filename, ap.associations, 'AprioriPartition', ap_time) write_dict_to_file(freq_filename, ap.all_frequent, 'AprioriParition', ap_time) print("AprioriPartition at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(ap.all_frequent))) print("Associations: " + str(len(ap.associations))) print("Time to run: "+str(ap_time)+"\n") start = time.time() a = Apriori(transactions, support, confidence) end = time.time() a_time = end - start assoc_filename = str(support) + '_associations_a' freq_filename = str(support)+'_freqItemsets_a' write_list_to_file(assoc_filename, a.associations, 'Apriori', a_time) write_dict_to_file(freq_filename, a.all_frequent, 'Apriori', a_time) print("Apriori at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(a.all_frequent))) print("Associations: " + str(len(a.associations))) print("Time to run: "+str(a_time)+"\n") start = time.time() fp = FPTree(transactions, support, confidence) end = time.time() fp_time = end - start assoc_filename = str(support) + '_associations_fp' freq_filename = str(support) + '_freqItemsets_fp' write_list_to_file(assoc_filename, fp.associations, 'FPGrowth', fp_time) write_dict_to_file(freq_filename, fp.all_frequent, 'FPGrowth', fp_time) print("FPGrowth at support: " + str(support) + ", confidence:" + str(confidence)) print("Frequent Itemsets: " + str(len(fp.all_frequent))) print("Associations: " + str(len(fp.associations))) print("Time to run: " + str(fp_time) + "\n") print("Asserts both algorithms generate the same association rule set: ", test_algos(a.associations, fp.associations)) print("==========================\n")
def setUp(self): _list = [ [1, 2, 3], [1, 4], [4, 5], [1, 2, 4], [1, 2, 6, 4, 3], [2, 6, 3], [2, 3, 6], ] #trans list to Collection tem_list = Collection() for l in _list: tem = Collection() tem_list.append(tem) for m in l: tem.append(m) self.apri = Apriori(tem_list, 0.3, 0.8)
def test_run(self): ap = Apriori(self.tidList, 63) result = ap.run() sort_f = lambda x: (x.support, x.item) result.sort(key=sort_f) self.targetSet.sort(key=sort_f) self.assertListEqual(result, self.targetSet) restrict_items = ["Bagel", "Cheese", "Milk"] ap_restrict = Apriori(self.tidList, 63, restrict_items=restrict_items) result_restrict = ap_restrict.run() result_restrict.sort(key=sort_f) target_restrict = list( filter(lambda x: x.contains(restrict_items), self.targetSet)) self.assertListEqual(result_restrict, target_restrict)
def apriori_test(): apriori = Apriori(min_support=0.45, min_conf=0.918) freqsets_lst, support_counter = apriori.find_freqsets(dataset) rules = apriori.generate_rules(freqsets_lst, support_counter) return rules
global score_is_calculated guessingGame = True if left_circle or right_circle : client.leave_store() if (client.x, client.y) == (280, 600): left_circle = False right_circle = False guessingGame = False client = None # to generate a new client and set a new path for him num_transaction += 1 is_drawn = False score_is_calculated = False client = Client() client = Client() apriori = Apriori() antecedents, consequents = apriori.get_rules() known_transactions = apriori.get_known_transactions() unknown_transactions = [ ('milk','chocolate','croissant'), ('burger','pizza','icecream'), ('soda','icecream','pistachio'), ('chocolate','pistachio','honey'), ('shrimp','fish','sweets'), ('mushroom','medicine'), ('cheese','chicken','fruits','meat'), ('icecream','burger','pizza'), ('milk','biscuit','egg'), ('shrimp','sweets','fish'), ('honey','chocolate','pistachio'), ('medicine','mushroom'),
#T = pd.read_csv("list6/task2Data/retail.dat", header=None) #print(T) X = set() T = [] for line in open("list6/task3Data/kosarak.dat"): t = frozenset( [int(x) for x in line.split()] ) X |= t T.append(t) assoc = Apriori(X, T, alpha=0.01, debug=True) best_rules = [] for a,b in assoc.rules: conf, lift, suppA, suppB, suppAB = assoc._get_stats(a,b) best_rules.append((lift,conf,len(a)+len(b),suppAB, a,b )) i = 0 for lift, conf, _, suppAB, a, b in sorted(best_rules, reverse=True): print("{} => {} \t lift: {}, conf: {}, supp: {}".format(tuple(a), tuple(b), lift, conf, suppAB))
# -*- coding: utf-8 -*- from apriori import Apriori, ImprovedApriori with open('after.csv', 'r') as fp: lines = fp.readlines() dataSet3 = [] for line in lines: line = line.rstrip() dataSet3.append(line.split(",")) minsup = 0.005 minconf = 0.5 if __name__ == '__main__': # test1 apriori = Apriori(dataSet3, minsup, minconf) apriori.run() apriori.print_frequent_itemset() apriori.print_rule()
help='Right destination', type='string', default=None) # (options, args) = optParser.parse_args() # Get two important parameters filePath = options.filePath minSupp = options.minSupp minConf = options.minConf rhs = frozenset([options.rhs]) print("""Parameters: \n - filePath: {} \n - mininum support: {} \n - mininum confidence: {} \n - rhs: {}\n""".\ format(filePath,minSupp,minConf, rhs)) # Run and print objApriori = Apriori(minSupp, minConf) itemCountDict, freqSet = objApriori.fit(filePath) for key, value in freqSet.items(): print('frequent {}-term set:'.format(key)) print('-' * 20) for itemset in value: print(list(itemset)) print() # Return rules with regard of `rhs` rules, conf_rules = objApriori.getSpecRules(filePath, rhs) print('-' * 20) print('rules refer to {}'.format(list(rhs))) print('제품 연관도 기댓값')
optParser.add_option('-s', '--minSup', dest='minSup', help='Mininum support', type='float', default=0.01) # mininum support value optParser.add_option('-n', '--fname', dest='fname', help='file name', type='string', default='patterns.txt') # name file to save all-term freq (options, args) = optParser.parse_args() # Get parameters filePath = options.filePath minSup = options.minSup fname = options.fname print("""Parameters: \n - filePath: {} \n - mininum support: {} \n - save file name: {} \n""".\ format(filePath,minSup,fname)) # Run and save objApriori = Apriori(minSup) itemCountDict, freqSet = objApriori.fit(filePath) with open(fname,"w+") as f: for key, values in freqSet.items(): for var in values: line = ';'.join([x for x in var]) line = '{}:'.format(itemCountDict[var])+line +'\n' f.write(line) print("Save done!")
def go_apriori(self): a = Apriori(self.value_list, self.tran_list) self.support_list = a.go_analyze()
def testAprioriGenK3(self): result = Apriori._Apriori__apriori_gen(self.large_set_k3) expected = [(3, 4, 5, 7)] self.assertEqual(result, expected)
def testAprioriGetL1(self): result = Apriori._Apriori__getL1(self.counter, self.minsup_count) expected = [(1,), (2,), (3,), (7,)] self.assertEqual(result, expected)
def run(database, type, min_support=0.05, min_confidence=0.5): if type == 'basic': if database == 'Acme-Supermarket': preprocessing = AcmeSupermarket(acme_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, acme_filepath) rules = algorithm.run() preprocessing.save_rules(rules) elif database == 'Movielens': preprocessing = Movielens(movie_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, movie_filepath) rules = algorithm.run() # Print? else: preprocessing = Retails(retails_filepath) preprocessing.load() algorithm = Apriori(min_support, min_confidence, retails_filepath) rules = algorithm.run() # print? else: if database == 'Acme-Supermarket': preprocessing = AcmeSupermarket(acme_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, acme_filepath) rules = algorithm.run() preprocessing.save_rules(rules) elif database == 'Movielens': preprocessing = Movielens(movie_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, movie_filepath) rules = algorithm.run() # Print? else: preprocessing = Retails(retails_filepath) preprocessing.load() algorithm = AprioriEnhanced(min_support, min_confidence, retails_filepath) rules = algorithm.run()
def testGenerateSubsets4(self): result = Apriori._Apriori__generate_subsets(self.candidate_set_4, self.transaction) expected = [(1, 2, 3, 7), (2, 3, 8, 9)] self.assertEqual(result, expected)
from apriori import Apriori transactions = [ [1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3] ] support = 0.2 confidence = 0.4 a = Apriori(transactions, support, confidence) fs = a.getFrequentSubset() print('frequent sets......') for i in fs: print(i) print('\n') print('associate rules......') rules = a.genRules(fs) for r in rules: print(r)
from apriori import Apriori items = [ 'bread', 'milk', 'cheese', 'beer', 'umbrella', 'diaper', 'water', 'detergent' ] trans = [] trans.append('milk,beer,umbrella,diaper'.split(',')) trans.append('umbrella,diaper,beer,detergent,bread'.split(',')) trans.append('beer,water,diaper,detergent'.split(',')) trans.append('beer,cheese,diaper,detergent,bread'.split(',')) trans.append('beer,umbrella,diaper,water'.split(',')) ap = Apriori() ap.calculate(items, trans, 0.4, 0.7) ap.info()
#Load in papers# #fPaper = open("./microsoft/Papers.txt", "r") #Papers = [] #for line in fPaper.readlines(): # words = line.split("\t") # Papers.append(paper(words[0], words[1], words[2], words[3], words[7], words[9])) #Load in keywords #fKeywords = open("./microsoft/PaperKeywords.txt", "r") #paperKeywords = [] #for line in fKeywords.readlines(): # words = line.split("\t") # paperKeywords.append(paperKeyword(words[0], words[1])) #Load in paperAuthorAffiliations fPAA = open("./microsoft/PaperAuthorAffiliations.txt", "r") PAA = [] for line in fPAA.readlines(): words = line.split("\t") PAA.append( paperAuthorAffiliation(words[0], words[1], words[2], words[4], words[5])) #Load in Authors #fAuthor = open("./microsoft/Authors.txt", "r") #Authors = [] #for line in fAuthor.readlines(): # words = line.split("\t") # Authors.append(author(words[0], words[1])) print('finished loading files') #call Apriori with minsup = 2 Apriori(PAA, 30)
def __init__(self, min_support, min_conf, max_rule_length=100): Apriori.__init__(self, min_support, min_conf) self.max_rule_length = max_rule_length
from apriori import Apriori import argparse if __name__ == "__main__": parser = argparse.ArgumentParser( description="Apriori algorithm for groceries dataset.") parser.add_argument('-s', '--support', default=0.02, type=float) parser.add_argument('-c', '--confidence', default=0.3, type=float) parser.add_argument('-f', '--file', default="groceries.csv") args = parser.parse_args() apriori = Apriori(args.file, _support=args.support, _confidence=args.confidence) apriori.expand() apriori.make_res() apriori.print_res()
from apriori import Apriori if __name__ == '__main__': while 1: filePath = input("Please input the file name:") minSupp = float(input("Please input minimum support:")) minConf = float(input("Please input minimum confidence:")) print("filePath:", filePath, "\n" "mininum support:", minSupp, "\n" "mininum confidence:", minConf, "\n") # Run and print Apriori_gen = Apriori(minSupp, minConf) freqSet = Apriori_gen.frequentCount(filePath) for key, value in freqSet.items(): print('frequent {}-term set:'.format(key)) print('-' * 20) for itemset in value: print(list(itemset)) print() # Return rules with regard of `rhs` for key, value in freqSet.items(): for item in value: rhs = item association_rules = Apriori_gen.find_Association_Rules(rhs) print('-' * 20) print('association_rules refer to {}'.format(list(rhs))) for key, value in association_rules.items():
def testAprioriGenK1(self): result = Apriori._Apriori__apriori_gen(self.large_set_k1) expected = [(1, 2), (1, 3), (2, 3)] self.assertEqual(result, expected)
def testAprioriGetL1(self): result = Apriori._Apriori__getL1(self.counter, self.minsup_count) expected = [(1, ), (2, ), (3, ), (7, )] self.assertEqual(result, expected)