Example #1
0
def runApriori(file):
    minSupport = float(input("Enter the minimum support value :: "))
    minConfidence = float(input("Enter the minimum confidence value :: "))

    data = apriori.dataFromFile(file)
    items, rules = apriori.runApriori(data, minSupport, minConfidence)
    apriori.printResults(items, rules)
Example #2
0
def main():
    import sys
    from optparse import OptionParser
    from apriori import runApriori, dataFromFile, printResults

    optparser = OptionParser()
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='filename containing csv',
                         default=None)
    optparser.add_option('-s', '--minSupport',
                         dest='minS',
                         help='minimum support value',
                         default=0.4,
                         type='float')

    (options, args) = optparser.parse_args()

    inFile = None
    if options.input is None:
        inFile = sys.stdin
    elif options.input is not None:
        inFile = dataFromFile(options.input)
    else:
        print ('No dataset filename specified, system with exit\n')
        sys.exit('System will exit')

    minSupport = options.minS

    items, PBoarder, NBoarder = runApriori(inFile, minSupport)

    printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
Example #3
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango'
        os.system('echo \'' + data + '\' > test_apriori.csv')

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        expected = [
            (('milk',), 0.5),
            (('apple',), 0.5),
            (('beer',), 0.75),
            (('rice',), 0.5),
            (('beer', 'rice'), 0.5)
        ]
        self.assertEqual(items, expected)

        expected = [
            ((('beer',), ('rice',)), 0.6666666666666666),
            ((('rice',), ('beer',)), 1.0)
        ]
        self.assertEqual(rules, expected)

        os.system('rm test_apriori.csv')
Example #4
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango'
        os.system('echo \'' + data + '\' > test_apriori.csv')

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        expected = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75),
                    (('rice', ), 0.5), (('beer', 'rice'), 0.5)]
        self.assertEqual(items, expected)

        expected = [((('beer', ), ('rice', )), 0.6666666666666666),
                    ((('rice', ), ('beer', )), 1.0)]
        self.assertEqual(rules, expected)

        os.system('rm test_apriori.csv')
Example #5
0
    def test_read_data_from_file(self):
        os.system('echo \'apple,beer,rice\' > test_apriori.csv')

        result = dataFromFile('test_apriori.csv')
        data = [each for each in result]

        expected = frozenset(['beer', 'rice', 'apple'])
        self.assertEqual(data[0], expected)

        os.system('rm test_apriori.csv')
Example #6
0
    def test_read_data_from_file(self):
        os.system('echo \'apple,beer,rice\' > test_apriori.csv')

        result = dataFromFile('test_apriori.csv')
        data = [each for each in result]

        expected = frozenset(['beer', 'rice', 'apple'])
        self.assertEqual(data[0], expected)

        os.system('rm test_apriori.csv')
Example #7
0
def get_all_recommendation( sup, con, subreddit):

	""" function to save all of the best recommendation result """
	temp_file = apriori.dataFromFile(tempFile_path)
	items, rules = apriori.runApriori(temp_file, sup, con)
	recommendation_set = list()
	for rule, confidence in rules:
		pre, post = rule
		for item in pre:
			if item not in recommendation_set and item.lower() != subreddit.lower():
				recommendation_set.append(item)
	
	os.remove(tempFile_path)
	return recommendation_set
Example #8
0
    def test_run_apriori_should_get_items_and_rules(self):
        data = 'apple,beer,rice,chicken\n'
        data += 'apple,beer,rice\n'
        data += 'apple,beer\n'
        data += 'apple,mango\n'
        data += 'milk,beer,rice,chicken\n'
        data += 'milk,beer,rice\n'
        data += 'milk,beer\n'
        data += 'milk,mango\n'

        with open('test_apriori.csv', 'w') as fh:
            fh.write(data) 

        inFile = dataFromFile('test_apriori.csv')
        minSupport = 0.5
        minConfidence = 0.05

        items, rules = runApriori(inFile, minSupport, minConfidence)

        ## to make the arrangement consistent
        items = sorted(items, key=lambda x: (len(x[0]), x[1], x[0]))
        items = [(set(a), b) for a,b in items]

        expected = [(("apple",), 0.5),
                    (("milk",), 0.5),
                    (("rice",), 0.5),
                    (("beer",), 0.75),
                    (("beer", "rice"), 0.5)]
        expected = [(set(a), b) for a,b in expected]

        self.assertEqual(items, expected)

        expected = [
            ((('beer',), ('rice',)), 0.6666666666666666),
            ((('rice',), ('beer',)), 1.0)
        ]
        self.assertEqual(set(rules), set(expected))
)

st.markdown(
    ' > Support(A) = (Number of transactions in which A appears)/(Total Number of Transactions'
)
st.markdown(' > Confidence(A->B) = Support(AUB)/Support(A)')
st.markdown('---')

support = st.slider("Enter the Minimum Support Value",
                    min_value=0.1,
                    max_value=0.9,
                    value=0.15)
confidence = st.slider("Enter the Minimum Confidence Value",
                       min_value=0.1,
                       max_value=0.9,
                       value=0.6)

inFile = dataFromFile(default_csv)

items, rules = runApriori(inFile, support, confidence)

i, r = to_str_results(items, rules)

st.markdown("## Results")

st.markdown("### Frequent Itemsets")
st.write(i)

st.markdown("### Frequent Rules")
st.write(r)
if max_col_ind == cut_df or max_col_ind == cut_df - 1:
    print('Value for cutting DataFrame should be higher')

for row_ind in range(len(csv_data)):
    for col_ind in range(len(csv_data.iloc[0, :])):
        if type(csv_data.iloc[row_ind, col_ind]) == float:
            csv_data.iloc[row_ind, col_ind] = float('NaN')

csv_data.to_csv('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv',
                header=False,
                index=False)
#%% _end of making it look nicely

#%% start o the apriori algorythm
inFile = dataFromFile('statuscodes_toleranceRange=' + str(toleranceRange) +
                      '.csv')
#inFile = dataFromFile('statuscodes.csv')

items, rules = runApriori(inFile, minSupport, minConfidence)
#%% end o the apriori algorythm

#items=tempitems[:64]
#rules=temprules[:70]

#%% saving all the rules to a .txt file
list_of_tuples = rules
f = open(
    'rules_toleranceRange=' + str(toleranceRange) + '_MinSupport=' +
    str(minSupport) + '_MinConfidence=' + str(minConfidence) + '_maxFailure=' +
    str(maxFailure) + '_minFailure=' + str(minFailure) + '.txt', 'w')
for t in list_of_tuples: