def test_get_maximum_for_dataset3(self):
     keywords_dont_matter_here = ['']
     kwc1 = KeywordCoordinate(6, 6, keywords_dont_matter_here)
     kwc2 = KeywordCoordinate(8, 8, keywords_dont_matter_here)
     kwc3 = KeywordCoordinate(9, 9, keywords_dont_matter_here)
     kwc4 = KeywordCoordinate(13, 13, keywords_dont_matter_here)
     kwc5 = KeywordCoordinate(24, 24, keywords_dont_matter_here)
     kwc6 = KeywordCoordinate(35, 35, keywords_dont_matter_here)
     dataset: dataset_type = [kwc1, kwc2, kwc3, kwc4, kwc5, kwc6]
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_maximum_for_dataset(dataset)
     self.assertAlmostEqual(result, 41.01, delta=0.01)
 def test_get_minimum_for_dataset2(self):
     keywords_dont_matter_here = ['']
     kwc1 = KeywordCoordinate(5, 5, keywords_dont_matter_here)
     kwc2 = KeywordCoordinate(6, 6, keywords_dont_matter_here)
     kwc3 = KeywordCoordinate(7, 7, keywords_dont_matter_here)
     kwc4 = KeywordCoordinate(8, 8, keywords_dont_matter_here)
     kwc5 = KeywordCoordinate(9, 9, keywords_dont_matter_here)
     kwc6 = KeywordCoordinate(10, 10, keywords_dont_matter_here)
     dataset: dataset_type = [kwc1, kwc2, kwc3, kwc4, kwc5, kwc6]
     cf = CostFunction(manhattan_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_minimum_for_dataset(dataset)
     self.assertAlmostEqual(result, 2.0, delta=0.01)
 def test_get_maximum_for_dataset1(self):
     keywords_dont_matter_here = ['']
     kwc1 = KeywordCoordinate(0, 0, keywords_dont_matter_here)
     kwc2 = KeywordCoordinate(1, 1, keywords_dont_matter_here)
     kwc3 = KeywordCoordinate(2, 2, keywords_dont_matter_here)
     kwc4 = KeywordCoordinate(3, 3, keywords_dont_matter_here)
     kwc5 = KeywordCoordinate(4, 4, keywords_dont_matter_here)
     kwc6 = KeywordCoordinate(5, 5, keywords_dont_matter_here)
     dataset: dataset_type = [kwc1, kwc2, kwc3, kwc4, kwc5, kwc6]
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_maximum_for_dataset(dataset)
     self.assertAlmostEqual(result, 7.07, delta=0.01)
 def test_get_minimum_for_dataset3(self):
     keywords_dont_matter_here = ['']
     kwc1 = KeywordCoordinate(0, 0, keywords_dont_matter_here)
     kwc2 = KeywordCoordinate(13, 13, keywords_dont_matter_here)
     kwc3 = KeywordCoordinate(20, 20, keywords_dont_matter_here)
     kwc4 = KeywordCoordinate(800, 800, keywords_dont_matter_here)
     kwc5 = KeywordCoordinate(9000, 9000, keywords_dont_matter_here)
     kwc6 = KeywordCoordinate(10000, 10000, keywords_dont_matter_here)
     dataset: dataset_type = [kwc1, kwc2, kwc3, kwc4, kwc5, kwc6]
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_minimum_for_dataset(dataset)
     self.assertAlmostEqual(result, 9.9, delta=0.01)
 def test_get_minimum_for_query4(self):
     keywords_dont_matter_here = ['']
     query = KeywordCoordinate(0, 0, keywords_dont_matter_here)
     kwc1 = KeywordCoordinate(8, 8, keywords_dont_matter_here)
     kwc2 = KeywordCoordinate(9, 9, keywords_dont_matter_here)
     kwc3 = KeywordCoordinate(13, 13, keywords_dont_matter_here)
     kwc4 = KeywordCoordinate(24, 24, keywords_dont_matter_here)
     kwc5 = KeywordCoordinate(35, 35, keywords_dont_matter_here)
     dataset: dataset_type = [kwc1, kwc2, kwc3, kwc4, kwc5]
     cf = CostFunction(manhattan_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_minimum_for_query(query, dataset)
     self.assertAlmostEqual(result, 16.0, delta=0.01)
Beispiel #6
0
 def test_get_min_inter_dataset_distance(self):
     query_keywords = ['family', 'food', 'outdoor']
     kwc1_keywords = ['family', 'food', 'outdoor']
     kwc2_keywords = ['food']
     kwc3_keywords = ['outdoor']
     query = KeywordCoordinate(0, 0, query_keywords)
     kwc1 = KeywordCoordinate(1, 1, kwc1_keywords)
     kwc2 = KeywordCoordinate(2, 2, kwc2_keywords)
     kwc3 = KeywordCoordinate(3, 3, kwc3_keywords)
     data = [kwc1, kwc2, kwc3]
     cf = CostFunction(euclidean_distance, combined_cosine_similarity, 0.3,
                       0.3, 0.4)
     so = Solver(query, data, cf, normalize=False)
     fs1 = frozenset([kwc1])
     fs2 = frozenset([kwc2])
     fs3 = frozenset([kwc3])
     fs4 = frozenset([kwc1, kwc2])
     fs5 = frozenset([kwc1, kwc3])
     fs6 = frozenset([kwc2, kwc3])
     fs7 = frozenset([kwc1, kwc2, kwc3])
     result = so.get_min_inter_dataset_distance()
     self.assertEqual(len(result), 7)
     self.assertAlmostEqual(result.get(fs1), 0.0, delta=0.01)
     self.assertAlmostEqual(result.get(fs2), 0.0, delta=0.01)
     self.assertAlmostEqual(result.get(fs3), 0.0, delta=0.01)
     self.assertAlmostEqual(result.get(fs4), 1.41, delta=0.01)
     self.assertAlmostEqual(result.get(fs5), 2.83, delta=0.01)
     self.assertAlmostEqual(result.get(fs6), 1.41, delta=0.01)
     self.assertAlmostEqual(result.get(fs7), 1.41, delta=0.01)
 def test_get_maximum_keyword_distance4(self):
     keywords_query = ['food', 'fun', 'outdoor']
     keywords_kwc1 = ['food', 'fun', 'outdoor']
     keywords_kwc2 = ['food', 'fun', 'outdoor']
     keywords_kwc3 = ['food', 'fun', 'outdoor']
     coordinates_dont_matter_here = 0
     query = KeywordCoordinate(coordinates_dont_matter_here,
                               coordinates_dont_matter_here, keywords_query)
     kwc1 = KeywordCoordinate(coordinates_dont_matter_here,
                              coordinates_dont_matter_here, keywords_kwc1)
     kwc2 = KeywordCoordinate(coordinates_dont_matter_here,
                              coordinates_dont_matter_here, keywords_kwc2)
     kwc3 = KeywordCoordinate(coordinates_dont_matter_here,
                              coordinates_dont_matter_here, keywords_kwc3)
     dataset: dataset_type = [kwc1, kwc2, kwc3]
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     result = cf.get_maximum_keyword_distance(query, dataset)
     self.assertAlmostEqual(result, 0.0, delta=0.01)
Beispiel #8
0
def get_min_inter_dataset_distances(costfunction: CostFunction, subsets):
    """
    This function gets executed inside every minimum inter-dataset distance process.
    :param costfunction: The CostFunction
    :param subsets: The subsets for the process
    :return: A list with tuples of the costs and their corresponding subset
    """
    results = []
    for subset in subsets:
        current_cost = costfunction.get_minimum_for_dataset(subset)
        results.append((current_cost, subset))
    return results
Beispiel #9
0
def get_max_keyword_similarity(costfunction: CostFunction,
                               query: KeywordCoordinate, subsets):
    """
    This function gets executed inside every maximum keyword similarity process.
    :param costfunction: The CostFunction
    :param query: The Query
    :param subsets: The subsets for the process
    :return: A list with tuples of the costs and their corresponding subset
    """
    results = []
    for subset in subsets:
        current_cost = costfunction.get_maximum_keyword_distance(query, subset)
        results.append((current_cost, subset))
    return results
 def test_instantiation(self):
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4, 0.5, 0.6, 0.7, False)
     self.assertEqual(euclidean_distance.__get__,
                      cf.distance_metric.__get__)
     self.assertEqual(separated_cosine_similarity.__get__,
                      cf.similarity_metric.__get__)
     self.assertAlmostEqual(cf.alpha, 0.3, delta=0.01)
     self.assertAlmostEqual(cf.beta, 0.3, delta=0.01)
     self.assertAlmostEqual(cf.omega, 0.4, delta=0.01)
     self.assertAlmostEqual(cf.query_distance_threshold, 0.5, delta=0.01)
     self.assertAlmostEqual(cf.dataset_distance_threshold, 0.6, delta=0.01)
     self.assertAlmostEqual(cf.keyword_similarity_threshold,
                            0.7,
                            delta=0.01)
     self.assertEqual(cf.disable_thresholds, False)
Beispiel #11
0
 def test_instantiation(self):
     query_keywords = ['family', 'food', 'outdoor']
     kwc1_keywords = ['family', 'food', 'outdoor']
     kwc2_keywords = ['food']
     kwc3_keywords = ['outdoor']
     query = KeywordCoordinate(0, 0, query_keywords)
     kwc1 = KeywordCoordinate(1, 1, kwc1_keywords)
     kwc2 = KeywordCoordinate(2, 2, kwc2_keywords)
     kwc3 = KeywordCoordinate(3, 3, kwc3_keywords)
     data = [kwc1, kwc2, kwc3]
     cf = CostFunction(euclidean_distance, separated_cosine_similarity, 0.3,
                       0.3, 0.4)
     so = Solver(query, data, cf, normalize=False, result_length=10)
     self.assertAlmostEqual(so.query.coordinates.x, 0, delta=0.01)
     self.assertAlmostEqual(so.query.coordinates.y, 0, delta=0.01)
     self.assertListEqual(so.data, data)
     self.assertAlmostEqual(so.data[0].coordinates.x, 1, delta=0.01)
     self.assertAlmostEqual(so.data[0].coordinates.y, 1, delta=0.01)
     self.assertListEqual(so.data[0].keywords, kwc1_keywords)
     for index in range(len(so.data[0].keywords)):
         self.assertEqual(so.data[0].keywords[index], kwc1_keywords[index])
     self.assertAlmostEqual(so.data[1].coordinates.x, 2, delta=0.01)
     self.assertAlmostEqual(so.data[1].coordinates.y, 2, delta=0.01)
     self.assertListEqual(so.data[1].keywords, kwc2_keywords)
     for index in range(len(so.data[1].keywords)):
         self.assertEqual(so.data[1].keywords[index], kwc2_keywords[index])
     self.assertAlmostEqual(so.data[2].coordinates.x, 3, delta=0.01)
     self.assertAlmostEqual(so.data[2].coordinates.y, 3, delta=0.01)
     self.assertListEqual(so.data[2].keywords, kwc3_keywords)
     for index in range(len(so.data[2].keywords)):
         self.assertEqual(so.data[2].keywords[index], kwc3_keywords[index])
     self.assertEqual(euclidean_distance.__get__,
                      so.cost_function.distance_metric.__get__)
     self.assertEqual(separated_cosine_similarity.__get__,
                      so.cost_function.similarity_metric.__get__)
     self.assertAlmostEqual(so.cost_function.alpha, 0.3, delta=0.01)
     self.assertAlmostEqual(so.cost_function.beta, 0.3, delta=0.01)
     self.assertAlmostEqual(so.cost_function.omega, 0.4, delta=0.01)
     self.assertEqual(so.normalize_data, False)
     self.assertEqual(so.result_length, 10)
     self.assertAlmostEqual(so.denormalize_max_x, 0.0, delta=0.01)
     self.assertAlmostEqual(so.denormalize_min_x, 0.0, delta=0.01)
     self.assertAlmostEqual(so.denormalize_max_y, 0.0, delta=0.01)
     self.assertAlmostEqual(so.denormalize_min_y, 0.0, delta=0.01)