def test_ml_embedded_contour_similarity(self):
     seq = [[0, 2, 1], [0, 1, 3, 2], [0, 2, 1, 3, 4]]
     tup = list(map(tuple, seq))
     mc = Comparison(*[Contour(s) for s in seq])
     m = [[1, float(9/13), float(13/28)], [float(9/13), 1, float(29/35)], [float(13/28), float(29/35), 1]]
     df = pandas.DataFrame(m, index=tup, columns=tup)
     self.assertEqual(mc.embedded_contour_similarity().to_dict(), df.to_dict())
 def test_ml_contour_similarity_crisp(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     tup = list(map(tuple, seq))
     mc = Comparison(*[Contour(s) for s in seq])
     m = [[1, float(5/6), float(5/6)], [float(5/6), 1, float(2/3)], [float(5/6), float(2/3), 1]]
     df = pandas.DataFrame(m, index=tup, columns=tup)
     self.assertEqual(mc.contour_similarity_crisp().to_dict(), df.to_dict())
Example #3
0
    def test_caseless_match_fail(self):
        a = "TestString"
        b = "testString1"

        comp = Comparison()
        comp.set_comparator("CaseInsensitiveMatch")
        res = comp.compare(a, b)

        self.assertEqual(res, 0)
Example #4
0
    def test_case_match_match(self):
        a = "TestString"
        b = "TestString"

        comp = Comparison()
        comp.set_comparator("CaseSensitiveMatch")
        res = comp.compare(a, b)

        self.assertEqual(res, 1)
Example #5
0
    def test_damerau_match_fail(self):
        a = "aabc"
        b = "aaaa"
        threshold = 0.7

        comp = Comparison()
        comp.set_comparator("DamerauLevenshtein")
        res = comp.compare(a, b)

        self.assertLess(res, threshold)
Example #6
0
    def test_damerau_match_perfect(self):
        a = "aaaa"
        b = "aaaa"
        threshold = 1

        comp = Comparison()
        comp.set_comparator("DamerauLevenshtein")
        res = comp.compare(a, b)

        self.assertEqual(res, threshold)
Example #7
0
    def test_damerau_match_success(self):
        a = "aaaa"
        b = "aaaa1"
        threshold = 0.7

        comp = Comparison()
        comp.set_comparator("DamerauLevenshtein")
        res = comp.compare(a, b)

        self.assertGreaterEqual(res, threshold)
Example #8
0
    def test_levenshtein_match_success(self):
        a = "aaaa"
        b = "aaaa1"
        threshold = 0.7

        comp = Comparison()
        comp.set_comparator("LevenshteinDistance")
        res = comp.compare(a, b)

        self.assertGreaterEqual(res, threshold)
 def test_friedmann_vectors(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     tup = list(map(tuple, seq))
     mc = Comparison(*[Contour(s) for s in seq])
     fv = [[[3, 0], [10, 0], [6, 0], 1, 1, 1],
             [[2, 1], [9, 1], [5, 1], float(1/3), 0.8, float(2/3)],
             [[2, 1], [9, 1], [5, 1], float(1/3), 0.8, float(2/3)]]
     columns = ['CASV', 'CCVI', 'CCVII', 'ICASV', 'ICCVI', 'ICCVII']
     df = plot.ExtendedDataFrame(fv, index=tup, columns=columns)
     self.assertEqual(mc.friedmann_vectors().to_dict(), df.to_dict())
 def test_reduction_all(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     mc = Comparison(*[Contour(s) for s in seq])
     c1 = Contour([0, 1])
     c2 = Contour([0, 2, 1])
     c3 = Contour([0, 2, 1, 3])
     r1 = [[c1, c1, c1, 1, 1, 3],
          [c2, c2, c2, 1, 1, 3],
          [c3, c1, c3, 0, 2, 3]]
     r2 = [[c1, c1, c1, 1, 1, 3, 4],
          [c2, c2, c2, 1, 1, 3, 4],
          [c3, c1, c3, 0, 2, 3, 4]]
     self.assertEqual(mc.reduction_all().as_matrix().tolist(), r1)
     self.assertEqual(mc.reduction_all(3, None, True).as_matrix().tolist(), r2)
Example #11
0
 def __init__(self):
     self.processor = Processing()
     self.mappings = None
     self.comparator = Comparison()
     self.comparator.set_comparator(comparison_alg)
Example #12
0
class Reconciliation:
    def __init__(self):
        self.processor = Processing()
        self.mappings = None
        self.comparator = Comparison()
        self.comparator.set_comparator(comparison_alg)

    def set_mappings(self, mappings: [dict]):
        self.mappings = []
        for mt in mappings:
            self.mappings.append(cast_from_dict(mt, AttributeMap))

    def set_mappings_from_json(self, mappings: str):
        self.set_mappings(json.loads(mappings))

    # Return a similarity level for two given datasets
    def similarity(self, dataset_a: Dataset, dataset_b: Dataset):

        if not isinstance(dataset_a, Dataset) \
                or not isinstance(dataset_b, Dataset):
            raise MissingOrBadParams(
                "Passed parameters are not Dataset objects")

        # Build the tuple set to compare
        compare_tuples = self.processor.transform(dataset_a, dataset_b,
                                                  self.mappings)
        if not len(compare_tuples):
            raise NoMatchingRules("No compare tuples could be generated")

        # Set the similarity of each tuple
        for ctuple in compare_tuples:
            ctuple.normalised_similarity = self.comparator.compare(
                ctuple.items[0], ctuple.items[1])

        # Calculate length dependent weight
        tuple_max_lengths = set()
        for ctuple in compare_tuples:
            # Get length of the maximum length string in tuple
            ctuple.max_length = len(max(ctuple.items, key=len))
            tuple_max_lengths.add(ctuple.max_length)

        # Maximum length of all tuples
        # tuples_max = max(tuple_max_lengths)

        # Sum of lengths of all tuples max
        sum_lengths = 0
        for ctuple in compare_tuples:
            sum_lengths = sum_lengths + ctuple.max_length

        # Calculate normalised weight
        for ctuple in compare_tuples:
            ctuple.length_weight = 1.0
            if use_length_weight:
                # ctuple.length_weight = ctuple.max_length / tuples_max
                ctuple.length_weight = ctuple.max_length / sum_lengths

        # Calculate normalised-weighted similarity for each tuple
        similarities = []
        for ctuple in compare_tuples:
            similarities.append(ctuple.normalised_similarity * ctuple.weight *
                                ctuple.length_weight)

        # Calculate Dataset Similarity Coefficient (if not using length_weight,
        # because length_weight is already normalised)
        if use_length_weight:
            sim = functools.reduce(lambda a, b: a + b, similarities)
        else:
            sim = functools.reduce(lambda a, b: a + b,
                                   similarities) / len(similarities)

        return sim
 def test_reduction_bor(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     mc = Comparison(*[Contour(s) for s in seq])
     reduced = [[Contour([0, 1]), 3], [Contour([0, 2, 1]), 3], [Contour([0, 2, 1, 3]), 3]]
     self.assertEqual(mc.reduction_bor().as_matrix().tolist(), reduced)
 def test_schumuckler_oscillation(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     mc = Comparison(*[Contour(s) for s in seq])
     r = [[0, 0, 3, 3], [1, 0.25, 4, 2], [2, 0.5, 5, 5/3]]
     self.assertEqual(mc.schmuckler_oscillation().as_matrix().tolist(), r)
 def test_direction(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     mc = Comparison(*[Contour(s) for s in seq])
     self.assertEqual(mc.direction().as_matrix().tolist(), [[1], [5/6], [5/6]])
 def test_cseg_to_tuple(self):
     seq = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3]]
     tup = list(map(tuple, seq))
     mc = Comparison(*[Contour(s) for s in seq])
     self.assertEqual(mc._csegs_to_tuple(), tup)