def test_ml_embedded_contour_similarity(self): seq = [[0, 2, 1], [0, 1, 3, 2], [0, 2, 1, 3, 4]] tup = list(map(tuple, seq)) mc = Comparison(*[Contour(s) for s in seq]) m = [[1, float(9/13), float(13/28)], [float(9/13), 1, float(29/35)], [float(13/28), float(29/35), 1]] df = pandas.DataFrame(m, index=tup, columns=tup) self.assertEqual(mc.embedded_contour_similarity().to_dict(), df.to_dict())
def test_ml_contour_similarity_crisp(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] tup = list(map(tuple, seq)) mc = Comparison(*[Contour(s) for s in seq]) m = [[1, float(5/6), float(5/6)], [float(5/6), 1, float(2/3)], [float(5/6), float(2/3), 1]] df = pandas.DataFrame(m, index=tup, columns=tup) self.assertEqual(mc.contour_similarity_crisp().to_dict(), df.to_dict())
def test_caseless_match_fail(self): a = "TestString" b = "testString1" comp = Comparison() comp.set_comparator("CaseInsensitiveMatch") res = comp.compare(a, b) self.assertEqual(res, 0)
def test_case_match_match(self): a = "TestString" b = "TestString" comp = Comparison() comp.set_comparator("CaseSensitiveMatch") res = comp.compare(a, b) self.assertEqual(res, 1)
def test_damerau_match_fail(self): a = "aabc" b = "aaaa" threshold = 0.7 comp = Comparison() comp.set_comparator("DamerauLevenshtein") res = comp.compare(a, b) self.assertLess(res, threshold)
def test_damerau_match_perfect(self): a = "aaaa" b = "aaaa" threshold = 1 comp = Comparison() comp.set_comparator("DamerauLevenshtein") res = comp.compare(a, b) self.assertEqual(res, threshold)
def test_damerau_match_success(self): a = "aaaa" b = "aaaa1" threshold = 0.7 comp = Comparison() comp.set_comparator("DamerauLevenshtein") res = comp.compare(a, b) self.assertGreaterEqual(res, threshold)
def test_levenshtein_match_success(self): a = "aaaa" b = "aaaa1" threshold = 0.7 comp = Comparison() comp.set_comparator("LevenshteinDistance") res = comp.compare(a, b) self.assertGreaterEqual(res, threshold)
def test_friedmann_vectors(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] tup = list(map(tuple, seq)) mc = Comparison(*[Contour(s) for s in seq]) fv = [[[3, 0], [10, 0], [6, 0], 1, 1, 1], [[2, 1], [9, 1], [5, 1], float(1/3), 0.8, float(2/3)], [[2, 1], [9, 1], [5, 1], float(1/3), 0.8, float(2/3)]] columns = ['CASV', 'CCVI', 'CCVII', 'ICASV', 'ICCVI', 'ICCVII'] df = plot.ExtendedDataFrame(fv, index=tup, columns=columns) self.assertEqual(mc.friedmann_vectors().to_dict(), df.to_dict())
def test_reduction_all(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] mc = Comparison(*[Contour(s) for s in seq]) c1 = Contour([0, 1]) c2 = Contour([0, 2, 1]) c3 = Contour([0, 2, 1, 3]) r1 = [[c1, c1, c1, 1, 1, 3], [c2, c2, c2, 1, 1, 3], [c3, c1, c3, 0, 2, 3]] r2 = [[c1, c1, c1, 1, 1, 3, 4], [c2, c2, c2, 1, 1, 3, 4], [c3, c1, c3, 0, 2, 3, 4]] self.assertEqual(mc.reduction_all().as_matrix().tolist(), r1) self.assertEqual(mc.reduction_all(3, None, True).as_matrix().tolist(), r2)
def __init__(self): self.processor = Processing() self.mappings = None self.comparator = Comparison() self.comparator.set_comparator(comparison_alg)
class Reconciliation: def __init__(self): self.processor = Processing() self.mappings = None self.comparator = Comparison() self.comparator.set_comparator(comparison_alg) def set_mappings(self, mappings: [dict]): self.mappings = [] for mt in mappings: self.mappings.append(cast_from_dict(mt, AttributeMap)) def set_mappings_from_json(self, mappings: str): self.set_mappings(json.loads(mappings)) # Return a similarity level for two given datasets def similarity(self, dataset_a: Dataset, dataset_b: Dataset): if not isinstance(dataset_a, Dataset) \ or not isinstance(dataset_b, Dataset): raise MissingOrBadParams( "Passed parameters are not Dataset objects") # Build the tuple set to compare compare_tuples = self.processor.transform(dataset_a, dataset_b, self.mappings) if not len(compare_tuples): raise NoMatchingRules("No compare tuples could be generated") # Set the similarity of each tuple for ctuple in compare_tuples: ctuple.normalised_similarity = self.comparator.compare( ctuple.items[0], ctuple.items[1]) # Calculate length dependent weight tuple_max_lengths = set() for ctuple in compare_tuples: # Get length of the maximum length string in tuple ctuple.max_length = len(max(ctuple.items, key=len)) tuple_max_lengths.add(ctuple.max_length) # Maximum length of all tuples # tuples_max = max(tuple_max_lengths) # Sum of lengths of all tuples max sum_lengths = 0 for ctuple in compare_tuples: sum_lengths = sum_lengths + ctuple.max_length # Calculate normalised weight for ctuple in compare_tuples: ctuple.length_weight = 1.0 if use_length_weight: # ctuple.length_weight = ctuple.max_length / tuples_max ctuple.length_weight = ctuple.max_length / sum_lengths # Calculate normalised-weighted similarity for each tuple similarities = [] for ctuple in compare_tuples: similarities.append(ctuple.normalised_similarity * ctuple.weight * ctuple.length_weight) # Calculate Dataset Similarity Coefficient (if not using length_weight, # because length_weight is already normalised) if use_length_weight: sim = functools.reduce(lambda a, b: a + b, similarities) else: sim = functools.reduce(lambda a, b: a + b, similarities) / len(similarities) return sim
def test_reduction_bor(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] mc = Comparison(*[Contour(s) for s in seq]) reduced = [[Contour([0, 1]), 3], [Contour([0, 2, 1]), 3], [Contour([0, 2, 1, 3]), 3]] self.assertEqual(mc.reduction_bor().as_matrix().tolist(), reduced)
def test_schumuckler_oscillation(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] mc = Comparison(*[Contour(s) for s in seq]) r = [[0, 0, 3, 3], [1, 0.25, 4, 2], [2, 0.5, 5, 5/3]] self.assertEqual(mc.schmuckler_oscillation().as_matrix().tolist(), r)
def test_direction(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] mc = Comparison(*[Contour(s) for s in seq]) self.assertEqual(mc.direction().as_matrix().tolist(), [[1], [5/6], [5/6]])
def test_cseg_to_tuple(self): seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]] tup = list(map(tuple, seq)) mc = Comparison(*[Contour(s) for s in seq]) self.assertEqual(mc._csegs_to_tuple(), tup)