예제 #1
0
def main():
    import sys
    from optparse import OptionParser
    from apriori import runApriori, dataFromFile, printResults

    optparser = OptionParser()
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='filename containing csv',
                         default=None)
    optparser.add_option('-s', '--minSupport',
                         dest='minS',
                         help='minimum support value',
                         default=0.4,
                         type='float')

    (options, args) = optparser.parse_args()

    inFile = None
    if options.input is None:
        inFile = sys.stdin
    elif options.input is not None:
        inFile = dataFromFile(options.input)
    else:
        print ('No dataset filename specified, system with exit\n')
        sys.exit('System will exit')

    minSupport = options.minS

    items, PBoarder, NBoarder = runApriori(inFile, minSupport)

    printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
예제 #2
0
def runApriori(file):
    minSupport = float(input("Enter the minimum support value :: "))
    minConfidence = float(input("Enter the minimum confidence value :: "))

    data = apriori.dataFromFile(file)
    items, rules = apriori.runApriori(data, minSupport, minConfidence)
    apriori.printResults(items, rules)
예제 #3
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango'
        os.system('echo \'' + data + '\' > test_apriori.csv')

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        expected = [
            (('milk',), 0.5),
            (('apple',), 0.5),
            (('beer',), 0.75),
            (('rice',), 0.5),
            (('beer', 'rice'), 0.5)
        ]
        self.assertEqual(items, expected)

        expected = [
            ((('beer',), ('rice',)), 0.6666666666666666),
            ((('rice',), ('beer',)), 1.0)
        ]
        self.assertEqual(rules, expected)

        os.system('rm test_apriori.csv')
예제 #4
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango'
        os.system('echo \'' + data + '\' > test_apriori.csv')

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        expected = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75),
                    (('rice', ), 0.5), (('beer', 'rice'), 0.5)]
        self.assertEqual(items, expected)

        expected = [((('beer', ), ('rice', )), 0.6666666666666666),
                    ((('rice', ), ('beer', )), 1.0)]
        self.assertEqual(rules, expected)

        os.system('rm test_apriori.csv')
예제 #5
0
def sd_apri_main(inFile,buckets_cls,minSupport, minConfidence,result_name):
    '''
    
    ''' 
    apri_logger.info("start sd_apri")
    #cfg_file_name = get_cfg_filename(BASE_DIR)
    get_cfg_filename(BASE_DIR)
    apri_indi_set = indicator_classify(inFile,buckets_cls)
    rows_file = apriori.dataFromList(apri_indi_set)
    items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
    result_dict = apriori.printResults(items, rules,result_name)
    return result_dict
예제 #6
0
def get_all_recommendation( sup, con, subreddit):

	""" function to save all of the best recommendation result """
	temp_file = apriori.dataFromFile(tempFile_path)
	items, rules = apriori.runApriori(temp_file, sup, con)
	recommendation_set = list()
	for rule, confidence in rules:
		pre, post = rule
		for item in pre:
			if item not in recommendation_set and item.lower() != subreddit.lower():
				recommendation_set.append(item)
	
	os.remove(tempFile_path)
	return recommendation_set
예제 #7
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango\n'

        with open('test_apriori.csv', 'w') as fh:
            fh.write(data) 

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        ## to make the arrangement consistent
        items = sorted(items, key=lambda x: (len(x[0]), x[1], x[0]))
        items = [(set(a), b) for a,b in items]

        expected = [(("apple",), 0.5),
                    (("milk",), 0.5),
                    (("rice",), 0.5),
                    (("beer",), 0.75),
                    (("beer", "rice"), 0.5)]
        expected = [(set(a), b) for a,b in expected]

        self.assertEqual(items, expected)

        expected = [
            ((('beer',), ('rice',)), 0.6666666666666666),
            ((('rice',), ('beer',)), 1.0)
        ]
        self.assertEqual(set(rules), set(expected))
)

st.markdown(
    ' > Support(A) = (Number of transactions in which A appears)/(Total Number of Transactions'
)
st.markdown(' > Confidence(A->B) = Support(AUB)/Support(A)')
st.markdown('---')

support = st.slider("Enter the Minimum Support Value",
                    min_value=0.1,
                    max_value=0.9,
                    value=0.15)
confidence = st.slider("Enter the Minimum Confidence Value",
                       min_value=0.1,
                       max_value=0.9,
                       value=0.6)

inFile = dataFromFile(default_csv)

items, rules = runApriori(inFile, support, confidence)

i, r = to_str_results(items, rules)

st.markdown("## Results")

st.markdown("### Frequent Itemsets")
st.write(i)

st.markdown("### Frequent Rules")
st.write(r)
예제 #9
0
    additives_stats = json.load(open(DATASET_FILENAME, "r"))
except:
    print("Building dataset")
    additives_stats = openfood.inline_map_reduce(mapper, reducer)
    json.dump(additives_stats, open(DATASET_FILENAME, "w"))

#add_clean = [(x['_id'], x['value'].split("_")) for x in additives_stats]
#add_clean.sort()
#for add in add_clean:
#    print("{}: {}".format(add[0], add[1]))

ordered_ids = [x['_id'] for x in additives_stats]
ordered_additives = [x['value'] for x in additives_stats]

vectorizer = CountVectorizer(tokenizer=lambda x: x.split("_"), binary=True)
X = vectorizer.fit_transform(ordered_additives)
Xarray = X.toarray()
print("Feature names: ", vectorizer.get_feature_names())
#print(dir(Xarray))
print("Feature vector size: ", Xarray.size)
print("Feature vector shape: ", Xarray.shape)
print("Number of samples: ", len(ordered_additives))
#print(type(Xarray))

ordered_additives_split = [x['value'].split("_") for x in additives_stats]

minSupport = 0.1
minConfidence = 0.5
items, rules = runApriori(ordered_additives_split, minSupport, minConfidence)
printResults(items, rules)
예제 #10
0
if __name__ == "__main__":

    alpha = 1.2
    p = 0.8

    mode = 'original' # original dataset
    #mode = 'bin' # binarized dataset
    #mode = 's-bin' # semi-binarized dataset
    inFile = dataFromFile('adult.txt', mode=mode)

    itemSet, transactionList = getItemSetTransactionList(inFile)

    minSupport, minConfidence = 0.09, 0.6

    print 'Apriori is running'
    items, rules, freqSet = runApriori(itemSet, transactionList, minSupport, minConfidence)
    printResults(items, rules)
    print 'number of frequent itemsets:', len(items)
    print 'number of frequent association rules:', len(rules)

    DI_s = frozenset(["sex: Female", "marital-status: Never-married"])
    #DI_s = frozenset(["sex: Female", "age: <=30"])
    #DI_s = frozenset(["marital-status: Not-Married", "education: No-Degree"])

    print 'discriminatory itemset:',DI_s

    MRs, PRs = get_MRs(rules, alpha, DI_s)
    print 'num of alpha-discriminatory rules', len(MRs)
    print 'num of alpha-protective rules', len(PRs)
    Rs, NRs = get_PRs(rules, freqSet, alpha, DI_s)
    print 'num of redlining rules and non-redlining', len(Rs)
for row_ind in range(len(csv_data)):
    for col_ind in range(len(csv_data.iloc[0, :])):
        if type(csv_data.iloc[row_ind, col_ind]) == float:
            csv_data.iloc[row_ind, col_ind] = float('NaN')

csv_data.to_csv('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv',
                header=False,
                index=False)
#%% _end of making it look nicely

#%% start o the apriori algorythm
inFile = dataFromFile('statuscodes_toleranceRange=' + str(toleranceRange) +
                      '.csv')
#inFile = dataFromFile('statuscodes.csv')

items, rules = runApriori(inFile, minSupport, minConfidence)
#%% end o the apriori algorythm

#items=tempitems[:64]
#rules=temprules[:70]

#%% saving all the rules to a .txt file
list_of_tuples = rules
f = open(
    'rules_toleranceRange=' + str(toleranceRange) + '_MinSupport=' +
    str(minSupport) + '_MinConfidence=' + str(minConfidence) + '_maxFailure=' +
    str(maxFailure) + '_minFailure=' + str(minFailure) + '.txt', 'w')
for t in list_of_tuples:
    line = ' '.join(str(x) for x in t)
    f.write(line + '\n')
f.close()
예제 #12
0
    #         '''
    #         try:
    #             apri_indi_set = indicator_classify(inFile,buckets_cls)
    #             inFile = apriori.dataFromList(apri_indi_set)
    #             items, rules = apriori.runApriori(inFile, minSupport, minConfidence)
    #             apriori.printResults(items, rules)
    #         except Exception,e:
    #             logging.error("apriori api error",e)
    #         else:
    #             logging.info("apriori api has execute successfully  ")

    full_name = os.path.realpath(inFile)
    cfg_file_name = get_cfg_filename(full_name)
    pos = full_name.find(".txt")
    result_name = full_name[:pos] + "_result.txt"
    logging.info("start apriori!")
    try:
        #logging.info("in try!")
        #logging.info("inFile",str(inFile))
        apri_indi_set = indicator_classify(inFile, buckets_cls)
        print "excute apriori algorithm"
        logging.info("excuting apriori!")
        rows_file = apriori.dataFromList(apri_indi_set)
        items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
        apriori.printResults(items, rules, result_name)
    except Exception, e:
        logging.error("apriori api error", str(e))
    else:
        logging.info("apriori api has execute successfully  ")
    print "End!!"
예제 #13
0
import json
from apriori import runApriori, printResults

def generateItemsets(items):
	for item in items:
		if ('ingredient_ids' in item):
			yield item['ingredient_ids']



data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r'));

ingredients = generateItemsets(data)

print 'Computing apriori'

items, rules = runApriori(ingredients, 0.00, 0.80)

printResults(items, rules)
예제 #14
0
db = client1.cmpe281
coll = db.recommend
cursor = db.recommend.find()

client2 = MongoClient("192.168.99.100:27017")
db2 = client2.cmpe281

minSupport = 3
transactions = []

for document in cursor:
    transactions.append(document['cart'].split(","))

# print(transactions)

items, rules = runApriori(transactions, 0.10, 0.68)

print(rules[0])

rule = {}
for rule in rules:
    for item in rule:
        if type(item) is tuple:
            cart = ",".join(item[0])
            recommend = ",".join(item[1])
            rule = {"cart": cart, "recommend": recommend}

            # print(rule)
            if db.rules.find(rule).count() > 0:
                print("document already exists")
            else:
예제 #15
0
import json
from apriori import runApriori, printResults


def generateItemsets(items):
    for item in items:
        if ('ingredient_ids' in item):
            yield item['ingredient_ids']


data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r'))

ingredients = generateItemsets(data)

print 'Computing apriori'

items, rules = runApriori(ingredients, 0.00, 0.80)

printResults(items, rules)
#             items, rules = apriori.runApriori(inFile, minSupport, minConfidence)
#             apriori.printResults(items, rules)
#         except Exception,e:
#             logging.error("apriori api error",e)
#         else:
#             logging.info("apriori api has execute successfully  ")
        
    full_name = os.path.realpath(inFile)
    cfg_file_name = get_cfg_filename(full_name)
    pos = full_name.find(".txt")
    result_name = full_name[:pos] + "_result.txt"
    logging.info("start apriori!")
    try:
        #logging.info("in try!")
        #logging.info("inFile",str(inFile))
        apri_indi_set = indicator_classify(inFile,buckets_cls)
        print "excute apriori algorithm"
        logging.info("excuting apriori!")
        rows_file = apriori.dataFromList(apri_indi_set)
        items, rules = apriori.runApriori(rows_file, minSupport, minConfidence)
        apriori.printResults(items, rules,result_name)
    except Exception,e:
        logging.error("apriori api error",str(e))
    else:
        logging.info("apriori api has execute successfully  ")
    print "End!!"
    
    


from __future__ import division
from pymongo import MongoClient
from bson.code import Code
from apriori import runApriori, printResults

client = MongoClient()
db = client["off"]
products = db["products"]

product_ingredients = []
total_ingredients = 0

for p in products.find():
    key = "ingredients_tags"
    if p.has_key(key) and len(p[key]) > 0:
        product_ingredients.append(p[key])
        total_ingredients += len(p[key])

print("Total products with ingredients: {}".format(len(product_ingredients)))
print("Total number of recorded ingredients: {}".format(total_ingredients))
print("Average number of ingredients per product: {}".format(
    len(product_ingredients) / total_ingredients))

minSupport = 0.2
minConfidence = 0.7
items, rules = runApriori(product_ingredients, minSupport, minConfidence)
printResults(items, rules)