def test_threshold6(self): t2 = Type2(euclidean_distance, separated_cosine_similarity, 0.25, 0.25, 0.5, math.inf, math.inf, 0.4) query = KeywordCoordinate(0, 0, ['keyword1', 'keyword2', 'keyword3']) kwc1 = KeywordCoordinate(0, 0, ['keyword1']) kwc2 = KeywordCoordinate(0, 0, ['keyword2']) data = [kwc1, kwc2] result = t2.solve(query, data) self.assertAlmostEqual(result, math.inf, delta=0.01)
def test_general(self): ev = Evaluator() possible_keywords = [ 'family', 'food', 'outdoor', 'rest', 'indoor', 'sports', 'science', 'culture', 'history' ] dg = DataGenerator(possible_keywords) gen_query = dg.generate(1)[0] gen_data = dg.generate(10) cf1 = Type1(euclidean_distance, combined_cosine_similarity, 0.33, 0.33, 0.33, disable_thresholds=True) cf2 = Type2(euclidean_distance, combined_cosine_similarity, 0.33, 0.33, 0.33, disable_thresholds=True) cf3 = Type3(euclidean_distance, combined_cosine_similarity, 0.33, 0.33, 0.33, disable_thresholds=True) ns1 = NaiveSolver(gen_query, gen_data, cf1, result_length=10, max_subset_size=6) ns2 = NaiveSolver(gen_query, gen_data, cf2, result_length=10, max_subset_size=6) ns3 = NaiveSolver(gen_query, gen_data, cf3, result_length=10, max_subset_size=6) ev.add_solver(ns1) ev.add_solver(ns2) ev.add_solver(ns3) ev.evaluate() results = ev.get_results() self.assertEqual(len(results), 3) self.assertEqual(len(results[0]), 2) self.assertEqual(len(results[1]), 2) self.assertEqual(len(results[2]), 2) self.assertEqual(len(results[0][0]), 10) self.assertEqual(len(results[1][0]), 10) self.assertEqual(len(results[2][0]), 10)
def test_instantiation(self): t2 = Type2(euclidean_distance, separated_cosine_similarity, 0.3, 0.3, 0.4, 0.5, 0.6, 0.7, False) self.assertEqual(euclidean_distance.__get__, t2.distance_metric.__get__) self.assertEqual(separated_cosine_similarity.__get__, t2.similarity_metric.__get__) self.assertAlmostEqual(t2.alpha, 0.3, delta=0.01) self.assertAlmostEqual(t2.beta, 0.3, delta=0.01) self.assertAlmostEqual(t2.omega, 0.4, delta=0.01) self.assertAlmostEqual(t2.query_distance_threshold, 0.5, delta=0.01) self.assertAlmostEqual(t2.dataset_distance_threshold, 0.6, delta=0.01) self.assertAlmostEqual(t2.keyword_similarity_threshold, 0.7, delta=0.01) self.assertEqual(t2.disable_thresholds, False)
def test_solve4(self): t2 = Type2(manhattan_distance, separated_cosine_similarity, 1, 0, 0, disable_thresholds=True) keywords_query = ['food', 'fun', 'outdoor', 'family'] keywords_kwc1 = ['food', 'fun', 'outdoor'] keywords_kwc2 = ['food', 'fun'] keywords_kwc3 = ['food'] query = KeywordCoordinate(0, 0, keywords_query) kwc1 = KeywordCoordinate(1, 1, keywords_kwc1) kwc2 = KeywordCoordinate(2, 2, keywords_kwc2) kwc3 = KeywordCoordinate(3, 3, keywords_kwc3) kwc4 = KeywordCoordinate(4, 4, keywords_kwc3) kwc5 = KeywordCoordinate(5, 5, keywords_kwc3) data = [kwc1, kwc2, kwc3, kwc4, kwc5] result = t2.solve(query, data) self.assertAlmostEqual(result, 10.0, delta=0.01)
def main(argv): start_time = time.time() # Evaluator, instantiate it first for logging purposes ev = Evaluator() query: KeywordCoordinate = load_pickle('query.pickle') print('Query:', query) data: dataset_type = load_pickle('dataset.pickle') # print('Data:', dataset_comprehension(data)) # Let's filter out by user radius # dataAux = sorted(data, key=lambda x: geographic_distance(x.coordinates, query.coordinates)) # distances = [geographic_distance(x.coordinates, query.coordinates) >= RADIUS for x in dataAux] # print('------ Distances: ', distances) # Load precalculated values and models precalculated_inter_dataset_distances = load_pickle( 'precalculated_inter_dataset_distances.pickle') precalculated_query_dataset_distances = load_pickle( 'precalculated_query_dataset_distances.pickle') precalculated_query_dataset_keyword_similarities = load_pickle( 'precalculated_query_dataset_keyword_similarities.pickle') # **** ONLY FOR word2vec model executions precalculated_query_dataset_keyword_similarities_word2vec = load_pickle( 'precalculated_query_dataset_keyword_similarities_word2vec.pickle') word2vec_model = load_word2vec_model('word2vec_model.pickle') # **** # Define the CostFunctions. For all possible parameters refer to the documentation. cf1 = Type1(euclidean_distance, combined_cosine_similarity, 0.2, 0.1, 0.7) cf2 = Type2(euclidean_distance, word2vec_cosine_similarity, 0.2, 0.1, 0.7, model=word2vec_model) cf3 = Type1( euclidean_distance, combined_cosine_similarity, 0.2, 0.1, 0.7, precalculated_inter_dataset_dict=precalculated_inter_dataset_distances, precalculated_query_dataset_dict=precalculated_query_dataset_distances, precalculated_keyword_similarity_dict= precalculated_query_dataset_keyword_similarities) cf4 = Type2( euclidean_distance, word2vec_cosine_similarity, 0, 0, 1.0, precalculated_inter_dataset_dict=precalculated_inter_dataset_distances, precalculated_query_dataset_dict=precalculated_query_dataset_distances, precalculated_keyword_similarity_dict= precalculated_query_dataset_keyword_similarities_word2vec, model=word2vec_model) cf5 = Type3( euclidean_distance, word2vec_cosine_similarity, 0.1, 0.1, 0.8, precalculated_inter_dataset_dict=precalculated_inter_dataset_distances, precalculated_query_dataset_dict=precalculated_query_dataset_distances, precalculated_keyword_similarity_dict= precalculated_query_dataset_keyword_similarities_word2vec, model=word2vec_model) cf6 = Type1( euclidean_distance, word2vec_cosine_similarity, 0.2, 0.1, 0.7, precalculated_inter_dataset_dict=precalculated_inter_dataset_distances, precalculated_query_dataset_dict=precalculated_query_dataset_distances, precalculated_keyword_similarity_dict= precalculated_query_dataset_keyword_similarities, model=word2vec_model) map_name = argv[0] # map_name = 'London_mini' # Choose which Solvers to use. For all possible parameters refer to the documentation. max_number_of_processes = mp.cpu_count() ns1 = NaiveSolver( query, data, cf2, result_length=5, max_subset_size=3, max_number_of_concurrent_processes=max_number_of_processes, _map=map_name) # ns2 = NaiveSolver(query, data, cf5, result_length=5, max_subset_size=3, # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name) # ns3 = NaiveSolver(query, data, cf3, result_length=5, max_subset_size=6, # max_number_of_concurrent_processes=max_number_of_processes) # ns4 = NaiveSolver(query, data, cf6, result_length=5, max_subset_size=3, # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name) # Add Solvers to Evaluator ev.add_solver(ns1) # ev.add_solver(ns2) # ev.add_solver(ns4) #Only for Debug: calculates and print physical distances between items in the dataset and the query location #distances = [geographic_distance(x.coordinates, query.coordinates) for x in data] # print('------ Distances: ', distances # Run Evaluator and fetch results ev.evaluate() results = ev.get_results() timings = ev.get_timings() write_csv(map_name, results, timings) print('*** Solution -', solution_list_comprehension(results)) # print('*** Timing -', timing_list_comprehension(timings)) initialLat = [] initialLon = [] keywords = [] gmap = gmplot.GoogleMapPlotter(query.coordinates.x, query.coordinates.y, 14) colors = ['red', 'blue', 'green', 'purple', 'orange'] # Third dimension is the order of solution (Best: 0, Second best: 1...) for i in range(5): lats = [] lons = [] for kwc in results[0][0][i][1]: lats.append(kwc.coordinates.x) lons.append(kwc.coordinates.y) keywords.append(kwc.keywords) for j in range(len(lats)): gmap.marker(lats[j], lons[j], color=colors[i]) gmap.polygon(lats, lons, color='cornflowerblue', edge_width=7) # initialLat.append(query.coordinates.x) # initialLon.append(query.coordinates.y) # gmap.scatter(initialLat, initialLon, '#00FF00', size = 70, marker = False) # gmap.scatter(lats, lons, '#FF0000',size = 50, marker = False ) # gmap.plot(lats, lons, 'cornflowerblue', edge_width = 3.0) # gmap.polygon(lats, lons, color='cornflowerblue', edge_width=10) # gmap.scatter(lats, lons, color='#3B0B39', size=40, marker=False) #Your Google_API_Key #gmap.apikey = " API_Key” # save it to html # gmap.scatter(lats, lons, '#FF0000', size=40, marker=True) gmap.marker(query.coordinates.x, query.coordinates.y, color='cornflowerblue', title='Query point') gmap.draw(r"graphic_results.html") print("--- %s seconds ---" % (time.time() - start_time))