def test_threshold(self): for thresh in self.threshs_pos: apr = apriori.Apriori(self.baskets, threshold=thresh) self.assertEqual(apr.threshold, thresh) self.assertEqual(apr.bbaskets, self.baskets) for thresh in self.threshs_neg: with self.self.assertRaises(ValueError): apriori.Apriori(self.baskets, thresh)
def test_calc_association_rules(self): trans = self.create_trans("1 2 3;1 2 4;1 2 5;1 4 5") _ = apriori.Apriori() freq_sets = _.calc_frequent_sets(trans, 0.75) rules = _.calc_association_rules(freq_sets, trans, 0.8) rules.sort() self.assertEqual(rules, self.create_rules("2->1"))
def _test(minimum_support): input_file = open('input.txt', 'r') output = StringIO() compare = open('tests/outputRsupport%d.txt' % minimum_support, 'r') apriori.Apriori(0.01 * minimum_support, input_file, output) contents = output.getvalue() ret_matrix = {} for line in contents.split("\n"): if not len(line): continue p, q, sup, conf = line.split("\t") ret_matrix['%s->%s' % (p, q)] = (sup, conf) for line in compare.readlines(): if line[-1] == '\n': line = line[0:-1] p, q, sup, conf = line.split("\t") assert ret_matrix['%s->%s' % (p, q)] == (sup, conf) # key = '%s->%s' % (p, q) # if key not in ret_matrix: # print("No key:\t", line) # # elif ret_matrix[key] != (sup, conf): # print("Mismatched:\t", key) # print("\t\t\t Expected: ", (sup, conf)) # print("\t\t\t Real:\t\t", ret_matrix[key]) compare.close()
def test_generate_L1(self): transactions_table = a.Apriori('../movies.txt') # Initialize finding L1 L = transactions_table.find_L1() for item in L: print(item.data) print(item.support)
def test_support_calc(self): trans = self.create_trans("1 2 3;1 2 4;1 2 5;3 4 5") _ = apriori.Apriori() self.assertEqual(_._calc_itemset_support(trans, frozenset([5])), 2 / 4) self.assertEqual(_._calc_itemset_support(trans, frozenset([1, 2])), 3 / 4) self.assertEqual(_._calc_itemset_support(trans, frozenset([1, 3, 4])), 0)
def test_regular_case(self): """ standard positive case """ max_set_size = 2 threshold = 0.5 cls = apriori.Apriori( max_set_size=max_set_size, threshold=threshold ) self.assertEqual(max_set_size, cls.max_set_size) self.assertEqual(threshold, cls.threshold)
def setUp(self): self.baskets = [{'a', 'b'}, {'a'}, {'a', 'b', 'c'}] self.thresh_mss2 = 0.5 self.apr_mss2 = apriori.Apriori( threshold=self.thresh_mss2 ) self.thresh_mss3 = 0 self.apr_mss3 = apriori.Apriori( threshold=self.thresh_mss3 ) self.thresh_mss4 = 0 self.apr_mss4 = apriori.Apriori( max_set_size=1, threshold=self.thresh_mss4 ) self.thresh_mss5 = 0.5 self.apr_mss5 = apriori.Apriori( max_set_size=1, threshold=self.thresh_mss5 ) unittest.TestCase.maxDiff = None
def ExtractFP(): # Paras path = "C:\\Users\\lyc\\Desktop\\final\\20170403.csv" # trip path startStopIndex = 2 endStopIndex = 6 # Create dict and data stopsDict = trip.StopsDict() stopsDict.CreateStopsDictFromFile(path, startStopIndex, endStopIndex) tripsData = trip.TripsData() tripsData.CreateRawODTrips(path, stopsDict.stopsDict, startStopIndex, endStopIndex) # Apriori apr = apriori.Apriori(tripsData.rawODTrips) apr.Process() apr.FPToCSV( "C:\\Users\\lyc\\Desktop\\final\\0403fp_0.0005.csv") # output path
def test_generate_candidates(self): transactions_table = a.Apriori('../movies.txt') # Initialize finding L1 L = transactions_table.find_L1() k = 1 min_support = 500 Ck = transactions_table.generate_candidates(L, min_support, 2) Ck = list(filter(lambda item: item.support > min_support, Ck)) Ck = transactions_table.generate_candidates(Ck, min_support, 3) Ck = list(filter(lambda item: item.support > min_support, Ck)) print(len(Ck)) for item in Ck: print(item.data) print(item.support)
def test_fit(self): trans = self.create_trans("1 2 3;1 2 4;1 2 5;1 4 5") _, rules = apriori.Apriori().fit(trans, 0.75, 0.8) self.assertEqual(rules, self.create_rules("2->1"))
def test_calc_frequent_sets(self): trans = self.create_trans("1 2 3;1 2 4;1 2 5;3 4 5") _ = apriori.Apriori() freq_sets = [j for i in _.calc_frequent_sets(trans, 0.75) for j in i] self.assertEqual(set(freq_sets), set(self.create_freq_sets("1;2;1 2")))
def test__get_next_level_freqset3(self): _ = apriori.Apriori() ret = set(_._get_next_level_freqset([[1, 2, 3], [1, 3, 4]])) self.assertEqual(ret, set())
def test__get_next_level_freqset2(self): _ = apriori.Apriori() ret = set(_._get_next_level_freqset([[1, 2], [2, 3], [1, 3]])) self.assertEqual(ret, set([frozenset([1, 2, 3])]))
def test_create_all_1_item_sets(self): trans = self.create_trans("1 2 3;1 2 4;1 2 5;3 4 5") _ = apriori.Apriori() self.assertEqual(set(_._create_all_1_item_sets(trans)), set([frozenset([i]) for i in range(1, 6)]))
def setUp(self): self.apr = apriori.Apriori()
for kind in d: for ele in d[kind]: x.append([word.lower() for word in re.split(r'\W*', ele[1]) if len(word)>1 and not word.lower() in stoplist]) # y.append(my_list[ori_list.index(kind)]) y.append(kind) ''' 'Bizchina News', 'Photo News', 'Sports News', 'Life News', 'China Daily News', 'World News', 'Entertainment News', 'Opinion News', 'China Daily USA News', 'HK Edition News', 'China News', 'China Daily European Weekly News' ''' x = array(x) y = array(y) m = x.shape[0] randind = random.permutation(m) x = x[randind] y = y[randind] print 'apriori is finding connection...' # ap = apriori.Apriori(map(set, x[y=='World News']), 0.04, 0.6) ap = apriori.Apriori(map(set, x), 0.04, 0.5) pprint(ap.freq_set()[0]) ap.pprint_rules() training_rate = 0.01 split_ind = m * training_rate train_x = x[:split_ind] train_y = y[:split_ind] test_x = x[split_ind:] test_y = y[split_ind:] print 'training...' nb_cl = nb.NaiveBayes(list(train_x), list(train_y)) print 'predicting...' correct = 0
print "Incoming request" return str(rules.getMatchedRules(request.form["input"].split(","))) #return str(rules.getMatchedRules(["348831","89362005"])) @app.route('/getDiseasesAndSymptoms', methods=['POST']) def getDiseaseAndSymptomRecommendations(): print request.json if not request.json: print "An error occurred - Expecting JSON request with list of patient IDs" print print "Incoming request" print # recommender = recommendations.Recommender("testdata.csv") recommender = recommendations.Recommender("data.csv") diseases = recommender.get_disease_recommendations(request.json, limit=3) symptoms = recommender.get_symptom_recommendations(request.json, limit=5) response = {"diseases" : diseases, "symptoms" : symptoms} print "Logging results returned for disease recommendations : " print print jsonify(response) #return jsonify({}) return jsonify(response) if __name__ == "__main__": rules = apriori.Apriori(0.01) #Generate rules on startup app.run()
def main(): algo = apriori.Apriori('../sample_datasets/config.csv') algo.RunApriori()