Ejemplo n.º 1
0
 def test_threshold6(self):
     t2 = Type2(euclidean_distance, separated_cosine_similarity, 0.25, 0.25,
                0.5, math.inf, math.inf, 0.4)
     query = KeywordCoordinate(0, 0, ['keyword1', 'keyword2', 'keyword3'])
     kwc1 = KeywordCoordinate(0, 0, ['keyword1'])
     kwc2 = KeywordCoordinate(0, 0, ['keyword2'])
     data = [kwc1, kwc2]
     result = t2.solve(query, data)
     self.assertAlmostEqual(result, math.inf, delta=0.01)
Ejemplo n.º 2
0
 def test_general(self):
     ev = Evaluator()
     possible_keywords = [
         'family', 'food', 'outdoor', 'rest', 'indoor', 'sports', 'science',
         'culture', 'history'
     ]
     dg = DataGenerator(possible_keywords)
     gen_query = dg.generate(1)[0]
     gen_data = dg.generate(10)
     cf1 = Type1(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     cf2 = Type2(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     cf3 = Type3(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     ns1 = NaiveSolver(gen_query,
                       gen_data,
                       cf1,
                       result_length=10,
                       max_subset_size=6)
     ns2 = NaiveSolver(gen_query,
                       gen_data,
                       cf2,
                       result_length=10,
                       max_subset_size=6)
     ns3 = NaiveSolver(gen_query,
                       gen_data,
                       cf3,
                       result_length=10,
                       max_subset_size=6)
     ev.add_solver(ns1)
     ev.add_solver(ns2)
     ev.add_solver(ns3)
     ev.evaluate()
     results = ev.get_results()
     self.assertEqual(len(results), 3)
     self.assertEqual(len(results[0]), 2)
     self.assertEqual(len(results[1]), 2)
     self.assertEqual(len(results[2]), 2)
     self.assertEqual(len(results[0][0]), 10)
     self.assertEqual(len(results[1][0]), 10)
     self.assertEqual(len(results[2][0]), 10)
Ejemplo n.º 3
0
 def test_instantiation(self):
     t2 = Type2(euclidean_distance, separated_cosine_similarity, 0.3, 0.3,
                0.4, 0.5, 0.6, 0.7, False)
     self.assertEqual(euclidean_distance.__get__,
                      t2.distance_metric.__get__)
     self.assertEqual(separated_cosine_similarity.__get__,
                      t2.similarity_metric.__get__)
     self.assertAlmostEqual(t2.alpha, 0.3, delta=0.01)
     self.assertAlmostEqual(t2.beta, 0.3, delta=0.01)
     self.assertAlmostEqual(t2.omega, 0.4, delta=0.01)
     self.assertAlmostEqual(t2.query_distance_threshold, 0.5, delta=0.01)
     self.assertAlmostEqual(t2.dataset_distance_threshold, 0.6, delta=0.01)
     self.assertAlmostEqual(t2.keyword_similarity_threshold,
                            0.7,
                            delta=0.01)
     self.assertEqual(t2.disable_thresholds, False)
Ejemplo n.º 4
0
 def test_solve4(self):
     t2 = Type2(manhattan_distance,
                separated_cosine_similarity,
                1,
                0,
                0,
                disable_thresholds=True)
     keywords_query = ['food', 'fun', 'outdoor', 'family']
     keywords_kwc1 = ['food', 'fun', 'outdoor']
     keywords_kwc2 = ['food', 'fun']
     keywords_kwc3 = ['food']
     query = KeywordCoordinate(0, 0, keywords_query)
     kwc1 = KeywordCoordinate(1, 1, keywords_kwc1)
     kwc2 = KeywordCoordinate(2, 2, keywords_kwc2)
     kwc3 = KeywordCoordinate(3, 3, keywords_kwc3)
     kwc4 = KeywordCoordinate(4, 4, keywords_kwc3)
     kwc5 = KeywordCoordinate(5, 5, keywords_kwc3)
     data = [kwc1, kwc2, kwc3, kwc4, kwc5]
     result = t2.solve(query, data)
     self.assertAlmostEqual(result, 10.0, delta=0.01)
Ejemplo n.º 5
0
def main(argv):
    start_time = time.time()

    # Evaluator, instantiate it first for logging purposes
    ev = Evaluator()

    query: KeywordCoordinate = load_pickle('query.pickle')
    print('Query:', query)
    data: dataset_type = load_pickle('dataset.pickle')
    # print('Data:', dataset_comprehension(data))

    # Let's filter out by user radius
    # dataAux = sorted(data, key=lambda x: geographic_distance(x.coordinates, query.coordinates))
    # distances = [geographic_distance(x.coordinates, query.coordinates) >= RADIUS for x in dataAux]
    # print('------ Distances: ', distances)

    # Load precalculated values and models
    precalculated_inter_dataset_distances = load_pickle(
        'precalculated_inter_dataset_distances.pickle')
    precalculated_query_dataset_distances = load_pickle(
        'precalculated_query_dataset_distances.pickle')
    precalculated_query_dataset_keyword_similarities = load_pickle(
        'precalculated_query_dataset_keyword_similarities.pickle')

    # **** ONLY FOR word2vec model executions
    precalculated_query_dataset_keyword_similarities_word2vec = load_pickle(
        'precalculated_query_dataset_keyword_similarities_word2vec.pickle')
    word2vec_model = load_word2vec_model('word2vec_model.pickle')
    # ****

    # Define the CostFunctions. For all possible parameters refer to the documentation.
    cf1 = Type1(euclidean_distance, combined_cosine_similarity, 0.2, 0.1, 0.7)
    cf2 = Type2(euclidean_distance,
                word2vec_cosine_similarity,
                0.2,
                0.1,
                0.7,
                model=word2vec_model)
    cf3 = Type1(
        euclidean_distance,
        combined_cosine_similarity,
        0.2,
        0.1,
        0.7,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities)
    cf4 = Type2(
        euclidean_distance,
        word2vec_cosine_similarity,
        0,
        0,
        1.0,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities_word2vec,
        model=word2vec_model)
    cf5 = Type3(
        euclidean_distance,
        word2vec_cosine_similarity,
        0.1,
        0.1,
        0.8,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities_word2vec,
        model=word2vec_model)
    cf6 = Type1(
        euclidean_distance,
        word2vec_cosine_similarity,
        0.2,
        0.1,
        0.7,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities,
        model=word2vec_model)

    map_name = argv[0]
    # map_name = 'London_mini'
    # Choose which Solvers to use. For all possible parameters refer to the documentation.
    max_number_of_processes = mp.cpu_count()
    ns1 = NaiveSolver(
        query,
        data,
        cf2,
        result_length=5,
        max_subset_size=3,
        max_number_of_concurrent_processes=max_number_of_processes,
        _map=map_name)
    # ns2 = NaiveSolver(query, data, cf5, result_length=5, max_subset_size=3,
    # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name)
    # ns3 = NaiveSolver(query, data, cf3, result_length=5, max_subset_size=6,
    #                   max_number_of_concurrent_processes=max_number_of_processes)
    # ns4 = NaiveSolver(query, data, cf6, result_length=5, max_subset_size=3,
    # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name)

    # Add Solvers to Evaluator
    ev.add_solver(ns1)
    # ev.add_solver(ns2)
    # ev.add_solver(ns4)

    #Only for Debug: calculates and print physical distances between items in the dataset and the query location
    #distances = [geographic_distance(x.coordinates, query.coordinates) for x in data]
    # print('------ Distances: ', distances

    # Run Evaluator and fetch results
    ev.evaluate()
    results = ev.get_results()
    timings = ev.get_timings()

    write_csv(map_name, results, timings)

    print('*** Solution -', solution_list_comprehension(results))
    # print('*** Timing -', timing_list_comprehension(timings))

    initialLat = []
    initialLon = []

    keywords = []

    gmap = gmplot.GoogleMapPlotter(query.coordinates.x, query.coordinates.y,
                                   14)

    colors = ['red', 'blue', 'green', 'purple', 'orange']

    # Third dimension is the order of solution (Best: 0, Second best: 1...)
    for i in range(5):
        lats = []
        lons = []
        for kwc in results[0][0][i][1]:
            lats.append(kwc.coordinates.x)
            lons.append(kwc.coordinates.y)
            keywords.append(kwc.keywords)
        for j in range(len(lats)):
            gmap.marker(lats[j], lons[j], color=colors[i])
        gmap.polygon(lats, lons, color='cornflowerblue', edge_width=7)

        # initialLat.append(query.coordinates.x)
        # initialLon.append(query.coordinates.y)

        # gmap.scatter(initialLat, initialLon, '#00FF00', size = 70, marker = False)
        # gmap.scatter(lats, lons, '#FF0000',size = 50, marker = False )

        # gmap.plot(lats, lons, 'cornflowerblue', edge_width = 3.0)
        # gmap.polygon(lats, lons, color='cornflowerblue', edge_width=10)

        # gmap.scatter(lats, lons, color='#3B0B39', size=40, marker=False)

        #Your Google_API_Key
        #gmap.apikey = " API_Key”
        # save it to html
    # gmap.scatter(lats, lons, '#FF0000', size=40, marker=True)
    gmap.marker(query.coordinates.x,
                query.coordinates.y,
                color='cornflowerblue',
                title='Query point')
    gmap.draw(r"graphic_results.html")

    print("--- %s seconds ---" % (time.time() - start_time))