def runApriori(file): minSupport = float(input("Enter the minimum support value :: ")) minConfidence = float(input("Enter the minimum confidence value :: ")) data = apriori.dataFromFile(file) items, rules = apriori.runApriori(data, minSupport, minConfidence) apriori.printResults(items, rules)
def main(): import sys from optparse import OptionParser from apriori import runApriori, dataFromFile, printResults optparser = OptionParser() optparser.add_option('-f', '--inputFile', dest='input', help='filename containing csv', default=None) optparser.add_option('-s', '--minSupport', dest='minS', help='minimum support value', default=0.4, type='float') (options, args) = optparser.parse_args() inFile = None if options.input is None: inFile = sys.stdin elif options.input is not None: inFile = dataFromFile(options.input) else: print ('No dataset filename specified, system with exit\n') sys.exit('System will exit') minSupport = options.minS items, PBoarder, NBoarder = runApriori(inFile, minSupport) printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango' os.system('echo \'' + data + '\' > test_apriori.csv') inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) expected = [ (('milk',), 0.5), (('apple',), 0.5), (('beer',), 0.75), (('rice',), 0.5), (('beer', 'rice'), 0.5) ] self.assertEqual(items, expected) expected = [ ((('beer',), ('rice',)), 0.6666666666666666), ((('rice',), ('beer',)), 1.0) ] self.assertEqual(rules, expected) os.system('rm test_apriori.csv')
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango' os.system('echo \'' + data + '\' > test_apriori.csv') inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) expected = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75), (('rice', ), 0.5), (('beer', 'rice'), 0.5)] self.assertEqual(items, expected) expected = [((('beer', ), ('rice', )), 0.6666666666666666), ((('rice', ), ('beer', )), 1.0)] self.assertEqual(rules, expected) os.system('rm test_apriori.csv')
def test_read_data_from_file(self): os.system('echo \'apple,beer,rice\' > test_apriori.csv') result = dataFromFile('test_apriori.csv') data = [each for each in result] expected = frozenset(['beer', 'rice', 'apple']) self.assertEqual(data[0], expected) os.system('rm test_apriori.csv')
def get_all_recommendation( sup, con, subreddit): """ function to save all of the best recommendation result """ temp_file = apriori.dataFromFile(tempFile_path) items, rules = apriori.runApriori(temp_file, sup, con) recommendation_set = list() for rule, confidence in rules: pre, post = rule for item in pre: if item not in recommendation_set and item.lower() != subreddit.lower(): recommendation_set.append(item) os.remove(tempFile_path) return recommendation_set
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango\n' with open('test_apriori.csv', 'w') as fh: fh.write(data) inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) ## to make the arrangement consistent items = sorted(items, key=lambda x: (len(x[0]), x[1], x[0])) items = [(set(a), b) for a,b in items] expected = [(("apple",), 0.5), (("milk",), 0.5), (("rice",), 0.5), (("beer",), 0.75), (("beer", "rice"), 0.5)] expected = [(set(a), b) for a,b in expected] self.assertEqual(items, expected) expected = [ ((('beer',), ('rice',)), 0.6666666666666666), ((('rice',), ('beer',)), 1.0) ] self.assertEqual(set(rules), set(expected))
) st.markdown( ' > Support(A) = (Number of transactions in which A appears)/(Total Number of Transactions' ) st.markdown(' > Confidence(A->B) = Support(AUB)/Support(A)') st.markdown('---') support = st.slider("Enter the Minimum Support Value", min_value=0.1, max_value=0.9, value=0.15) confidence = st.slider("Enter the Minimum Confidence Value", min_value=0.1, max_value=0.9, value=0.6) inFile = dataFromFile(default_csv) items, rules = runApriori(inFile, support, confidence) i, r = to_str_results(items, rules) st.markdown("## Results") st.markdown("### Frequent Itemsets") st.write(i) st.markdown("### Frequent Rules") st.write(r)
if max_col_ind == cut_df or max_col_ind == cut_df - 1: print('Value for cutting DataFrame should be higher') for row_ind in range(len(csv_data)): for col_ind in range(len(csv_data.iloc[0, :])): if type(csv_data.iloc[row_ind, col_ind]) == float: csv_data.iloc[row_ind, col_ind] = float('NaN') csv_data.to_csv('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv', header=False, index=False) #%% _end of making it look nicely #%% start o the apriori algorythm inFile = dataFromFile('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv') #inFile = dataFromFile('statuscodes.csv') items, rules = runApriori(inFile, minSupport, minConfidence) #%% end o the apriori algorythm #items=tempitems[:64] #rules=temprules[:70] #%% saving all the rules to a .txt file list_of_tuples = rules f = open( 'rules_toleranceRange=' + str(toleranceRange) + '_MinSupport=' + str(minSupport) + '_MinConfidence=' + str(minConfidence) + '_maxFailure=' + str(maxFailure) + '_minFailure=' + str(minFailure) + '.txt', 'w') for t in list_of_tuples: