Example #1
0
def runApriori(file):
    minSupport = float(input("Enter the minimum support value :: "))
    minConfidence = float(input("Enter the minimum confidence value :: "))

    data = apriori.dataFromFile(file)
    items, rules = apriori.runApriori(data, minSupport, minConfidence)
    apriori.printResults(items, rules)
Example #2
0
def main():
    import sys
    from optparse import OptionParser
    from apriori import runApriori, dataFromFile, printResults

    optparser = OptionParser()
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='filename containing csv',
                         default=None)
    optparser.add_option('-s', '--minSupport',
                         dest='minS',
                         help='minimum support value',
                         default=0.4,
                         type='float')

    (options, args) = optparser.parse_args()

    inFile = None
    if options.input is None:
        inFile = sys.stdin
    elif options.input is not None:
        inFile = dataFromFile(options.input)
    else:
        print ('No dataset filename specified, system with exit\n')
        sys.exit('System will exit')

    minSupport = options.minS

    items, PBoarder, NBoarder = runApriori(inFile, minSupport)

    printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
Example #3
0
    def test_print_results_should_have_results_in_defined_format(self):
        with patch('sys.stdout', new=StringIO()) as fake_output:
            items = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75),
                     (('rice', ), 0.5), (('beer', 'rice'), 0.5)]
            rules = [((('beer', ), ('rice', )), 0.6666666666666666),
                     ((('rice', ), ('beer', )), 1.0)]
            printResults(items, rules)

            expected = "item: ('milk',) , 0.500\nitem: ('apple',) , "
            expected += "0.500\nitem: ('rice',) , 0.500\nitem: ('beer', "
            expected += "'rice') , 0.500\nitem: ('beer',) , 0.750\n\n"
            expected += "------------------------ RULES:\nRule: ('beer',) "
            expected += "==> ('rice',) , 0.667\nRule: ('rice',) ==> "
            expected += "('beer',) , 1.000\n"
            self.assertEqual(fake_output.getvalue(), expected)
Example #4
0
def sd_apri_main(inFile,buckets_cls,minSupport, minConfidence,result_name):
    '''
    
    ''' 
    apri_logger.info("start sd_apri")
    #cfg_file_name = get_cfg_filename(BASE_DIR)
    get_cfg_filename(BASE_DIR)
    apri_indi_set = indicator_classify(inFile,buckets_cls)
    rows_file = apriori.dataFromList(apri_indi_set)
    items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
    result_dict = apriori.printResults(items, rules,result_name)
    return result_dict
Example #5
0
    def test_print_results_should_have_results_in_defined_format(self):
        with patch('sys.stdout', new=StringIO()) as fake_output:
            items = [
                (('milk',), 0.5),
                (('apple',), 0.5),
                (('beer',), 0.75),
                (('rice',), 0.5),
                (('beer', 'rice'), 0.5)
            ]
            rules = [
                ((('beer',), ('rice',)), 0.6666666666666666),
                ((('rice',), ('beer',)), 1.0)
            ]
            printResults(items, rules)

            expected = "item: ('milk',) , 0.500\nitem: ('apple',) , "
            expected += "0.500\nitem: ('rice',) , 0.500\nitem: ('beer', "
            expected += "'rice') , 0.500\nitem: ('beer',) , 0.750\n\n"
            expected += "------------------------ RULES:\nRule: ('beer',) "
            expected += "==> ('rice',) , 0.667\nRule: ('rice',) ==> "
            expected += "('beer',) , 1.000\n"
            self.assertEqual(fake_output.getvalue(), expected)
Example #6
0
    additives_stats = json.load(open(DATASET_FILENAME, "r"))
except:
    print("Building dataset")
    additives_stats = openfood.inline_map_reduce(mapper, reducer)
    json.dump(additives_stats, open(DATASET_FILENAME, "w"))

#add_clean = [(x['_id'], x['value'].split("_")) for x in additives_stats]
#add_clean.sort()
#for add in add_clean:
#    print("{}: {}".format(add[0], add[1]))

ordered_ids = [x['_id'] for x in additives_stats]
ordered_additives = [x['value'] for x in additives_stats]

vectorizer = CountVectorizer(tokenizer=lambda x: x.split("_"), binary=True)
X = vectorizer.fit_transform(ordered_additives)
Xarray = X.toarray()
print("Feature names: ", vectorizer.get_feature_names())
#print(dir(Xarray))
print("Feature vector size: ", Xarray.size)
print("Feature vector shape: ", Xarray.shape)
print("Number of samples: ", len(ordered_additives))
#print(type(Xarray))

ordered_additives_split = [x['value'].split("_") for x in additives_stats]

minSupport = 0.1
minConfidence = 0.5
items, rules = runApriori(ordered_additives_split, minSupport, minConfidence)
printResults(items, rules)
#             items, rules = apriori.runApriori(inFile, minSupport, minConfidence)
#             apriori.printResults(items, rules)
#         except Exception,e:
#             logging.error("apriori api error",e)
#         else:
#             logging.info("apriori api has execute successfully  ")
        
    full_name = os.path.realpath(inFile)
    cfg_file_name = get_cfg_filename(full_name)
    pos = full_name.find(".txt")
    result_name = full_name[:pos] + "_result.txt"
    logging.info("start apriori!")
    try:
        #logging.info("in try!")
        #logging.info("inFile",str(inFile))
        apri_indi_set = indicator_classify(inFile,buckets_cls)
        print "excute apriori algorithm"
        logging.info("excuting apriori!")
        rows_file = apriori.dataFromList(apri_indi_set)
        items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
        apriori.printResults(items, rules,result_name)
    except Exception,e:
        logging.error("apriori api error",str(e))
    else:
        logging.info("apriori api has execute successfully  ")
    print "End!!"
    
    


Example #8
0
    #         '''
    #         try:
    #             apri_indi_set = indicator_classify(inFile,buckets_cls)
    #             inFile = apriori.dataFromList(apri_indi_set)
    #             items, rules = apriori.runApriori(inFile, minSupport, minConfidence)
    #             apriori.printResults(items, rules)
    #         except Exception,e:
    #             logging.error("apriori api error",e)
    #         else:
    #             logging.info("apriori api has execute successfully  ")

    full_name = os.path.realpath(inFile)
    cfg_file_name = get_cfg_filename(full_name)
    pos = full_name.find(".txt")
    result_name = full_name[:pos] + "_result.txt"
    logging.info("start apriori!")
    try:
        #logging.info("in try!")
        #logging.info("inFile",str(inFile))
        apri_indi_set = indicator_classify(inFile, buckets_cls)
        print "excute apriori algorithm"
        logging.info("excuting apriori!")
        rows_file = apriori.dataFromList(apri_indi_set)
        items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
        apriori.printResults(items, rules, result_name)
    except Exception, e:
        logging.error("apriori api error", str(e))
    else:
        logging.info("apriori api has execute successfully  ")
    print "End!!"
import json
from apriori import runApriori, printResults

def generateItemsets(items):
	for item in items:
		if ('ingredient_ids' in item):
			yield item['ingredient_ids']



data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r'));

ingredients = generateItemsets(data)

print 'Computing apriori'

items, rules = runApriori(ingredients, 0.00, 0.80)

printResults(items, rules)