def runApriori(file): minSupport = float(input("Enter the minimum support value :: ")) minConfidence = float(input("Enter the minimum confidence value :: ")) data = apriori.dataFromFile(file) items, rules = apriori.runApriori(data, minSupport, minConfidence) apriori.printResults(items, rules)
def main(): import sys from optparse import OptionParser from apriori import runApriori, dataFromFile, printResults optparser = OptionParser() optparser.add_option('-f', '--inputFile', dest='input', help='filename containing csv', default=None) optparser.add_option('-s', '--minSupport', dest='minS', help='minimum support value', default=0.4, type='float') (options, args) = optparser.parse_args() inFile = None if options.input is None: inFile = sys.stdin elif options.input is not None: inFile = dataFromFile(options.input) else: print ('No dataset filename specified, system with exit\n') sys.exit('System will exit') minSupport = options.minS items, PBoarder, NBoarder = runApriori(inFile, minSupport) printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
def test_print_results_should_have_results_in_defined_format(self): with patch('sys.stdout', new=StringIO()) as fake_output: items = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75), (('rice', ), 0.5), (('beer', 'rice'), 0.5)] rules = [((('beer', ), ('rice', )), 0.6666666666666666), ((('rice', ), ('beer', )), 1.0)] printResults(items, rules) expected = "item: ('milk',) , 0.500\nitem: ('apple',) , " expected += "0.500\nitem: ('rice',) , 0.500\nitem: ('beer', " expected += "'rice') , 0.500\nitem: ('beer',) , 0.750\n\n" expected += "------------------------ RULES:\nRule: ('beer',) " expected += "==> ('rice',) , 0.667\nRule: ('rice',) ==> " expected += "('beer',) , 1.000\n" self.assertEqual(fake_output.getvalue(), expected)
def sd_apri_main(inFile,buckets_cls,minSupport, minConfidence,result_name): ''' ''' apri_logger.info("start sd_apri") #cfg_file_name = get_cfg_filename(BASE_DIR) get_cfg_filename(BASE_DIR) apri_indi_set = indicator_classify(inFile,buckets_cls) rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) result_dict = apriori.printResults(items, rules,result_name) return result_dict
def test_print_results_should_have_results_in_defined_format(self): with patch('sys.stdout', new=StringIO()) as fake_output: items = [ (('milk',), 0.5), (('apple',), 0.5), (('beer',), 0.75), (('rice',), 0.5), (('beer', 'rice'), 0.5) ] rules = [ ((('beer',), ('rice',)), 0.6666666666666666), ((('rice',), ('beer',)), 1.0) ] printResults(items, rules) expected = "item: ('milk',) , 0.500\nitem: ('apple',) , " expected += "0.500\nitem: ('rice',) , 0.500\nitem: ('beer', " expected += "'rice') , 0.500\nitem: ('beer',) , 0.750\n\n" expected += "------------------------ RULES:\nRule: ('beer',) " expected += "==> ('rice',) , 0.667\nRule: ('rice',) ==> " expected += "('beer',) , 1.000\n" self.assertEqual(fake_output.getvalue(), expected)
additives_stats = json.load(open(DATASET_FILENAME, "r")) except: print("Building dataset") additives_stats = openfood.inline_map_reduce(mapper, reducer) json.dump(additives_stats, open(DATASET_FILENAME, "w")) #add_clean = [(x['_id'], x['value'].split("_")) for x in additives_stats] #add_clean.sort() #for add in add_clean: # print("{}: {}".format(add[0], add[1])) ordered_ids = [x['_id'] for x in additives_stats] ordered_additives = [x['value'] for x in additives_stats] vectorizer = CountVectorizer(tokenizer=lambda x: x.split("_"), binary=True) X = vectorizer.fit_transform(ordered_additives) Xarray = X.toarray() print("Feature names: ", vectorizer.get_feature_names()) #print(dir(Xarray)) print("Feature vector size: ", Xarray.size) print("Feature vector shape: ", Xarray.shape) print("Number of samples: ", len(ordered_additives)) #print(type(Xarray)) ordered_additives_split = [x['value'].split("_") for x in additives_stats] minSupport = 0.1 minConfidence = 0.5 items, rules = runApriori(ordered_additives_split, minSupport, minConfidence) printResults(items, rules)
# items, rules = apriori.runApriori(inFile, minSupport, minConfidence) # apriori.printResults(items, rules) # except Exception,e: # logging.error("apriori api error",e) # else: # logging.info("apriori api has execute successfully ") full_name = os.path.realpath(inFile) cfg_file_name = get_cfg_filename(full_name) pos = full_name.find(".txt") result_name = full_name[:pos] + "_result.txt" logging.info("start apriori!") try: #logging.info("in try!") #logging.info("inFile",str(inFile)) apri_indi_set = indicator_classify(inFile,buckets_cls) print "excute apriori algorithm" logging.info("excuting apriori!") rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) apriori.printResults(items, rules,result_name) except Exception,e: logging.error("apriori api error",str(e)) else: logging.info("apriori api has execute successfully ") print "End!!"
# ''' # try: # apri_indi_set = indicator_classify(inFile,buckets_cls) # inFile = apriori.dataFromList(apri_indi_set) # items, rules = apriori.runApriori(inFile, minSupport, minConfidence) # apriori.printResults(items, rules) # except Exception,e: # logging.error("apriori api error",e) # else: # logging.info("apriori api has execute successfully ") full_name = os.path.realpath(inFile) cfg_file_name = get_cfg_filename(full_name) pos = full_name.find(".txt") result_name = full_name[:pos] + "_result.txt" logging.info("start apriori!") try: #logging.info("in try!") #logging.info("inFile",str(inFile)) apri_indi_set = indicator_classify(inFile, buckets_cls) print "excute apriori algorithm" logging.info("excuting apriori!") rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) apriori.printResults(items, rules, result_name) except Exception, e: logging.error("apriori api error", str(e)) else: logging.info("apriori api has execute successfully ") print "End!!"
import json from apriori import runApriori, printResults def generateItemsets(items): for item in items: if ('ingredient_ids' in item): yield item['ingredient_ids'] data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r')); ingredients = generateItemsets(data) print 'Computing apriori' items, rules = runApriori(ingredients, 0.00, 0.80) printResults(items, rules)