コード例 #1
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
    def test_substitution_notfound(self):
        attributes = ["$NotToBeFound", "ZZ", "$FamilyName"]
        expected_str = "ZZARAGO MONZONIS"
        p = Processing()
        final_str = p.substitute(attributes, self.datasets[0])

        self.assertEqual(final_str, expected_str)
コード例 #2
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
    def test_substitution(self):
        attributes = ["$GivenName", " ", "$FamilyName"]
        expected_str = "FRANCISCO JOSE ARAGO MONZONIS"
        p = Processing()
        final_str = p.substitute(attributes, self.datasets[0])

        self.assertEqual(final_str, expected_str)
コード例 #3
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
    def test_transform_exact_match(self):
        p = Processing()
        ctuples = p.transform(self.datasets[0], self.datasets[1],
                              self.mappings)

        self.assertGreaterEqual(len(ctuples), 1)
        for tp in ctuples:
            self.assertEqual(tp.items[0], tp.items[1])
コード例 #4
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
    def test_transform_similar_match_greek(self):
        p = Processing()
        ctuples = p.transform(self.datasets[2], self.datasets[3],
                              self.mappings)

        self.assertEqual(len(ctuples), 1)
        self.assertEqual(ctuples[0].items[0], "ANDREAS PETROU")
        self.assertEqual(ctuples[0].items[1], "ANDREAS PETRO")
コード例 #5
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
 def test_no_match_set(self):
     tr = AttributeMap()
     pr = Pairing()
     pr.profile = "fail"
     pr.issuer = "fail"
     pr.categories = ["None", "to", "be", "found"]
     tr.pairings = [pr]
     p = Processing()
     # with self.assertRaises(MapDatasetMatchNotFound):
     pairings = p.match_set(self.datasets[0], tr)
     self.assertEquals(pairings, set())
コード例 #6
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
 def test_clean_string(self):
     input_string = "  legitimate-.,;:_·<>+\\|/'#@()\"\t\n\r!%&=?¡¿    text "
     expected_string = "legitimate text"
     p = Processing()
     output_string = Tools.clean_string(input_string,
                                        StringProcessor.unwanted_chars)
     output_string = Tools.clean_spaces(output_string)
     self.assertEqual(expected_string, output_string)
コード例 #7
0
 def __init__(self):
     self.processor = Processing()
     self.mappings = None
     self.comparator = Comparison()
     self.comparator.set_comparator(comparison_alg)
コード例 #8
0
class Reconciliation:
    def __init__(self):
        self.processor = Processing()
        self.mappings = None
        self.comparator = Comparison()
        self.comparator.set_comparator(comparison_alg)

    def set_mappings(self, mappings: [dict]):
        self.mappings = []
        for mt in mappings:
            self.mappings.append(cast_from_dict(mt, AttributeMap))

    def set_mappings_from_json(self, mappings: str):
        self.set_mappings(json.loads(mappings))

    # Return a similarity level for two given datasets
    def similarity(self, dataset_a: Dataset, dataset_b: Dataset):

        if not isinstance(dataset_a, Dataset) \
                or not isinstance(dataset_b, Dataset):
            raise MissingOrBadParams(
                "Passed parameters are not Dataset objects")

        # Build the tuple set to compare
        compare_tuples = self.processor.transform(dataset_a, dataset_b,
                                                  self.mappings)
        if not len(compare_tuples):
            raise NoMatchingRules("No compare tuples could be generated")

        # Set the similarity of each tuple
        for ctuple in compare_tuples:
            ctuple.normalised_similarity = self.comparator.compare(
                ctuple.items[0], ctuple.items[1])

        # Calculate length dependent weight
        tuple_max_lengths = set()
        for ctuple in compare_tuples:
            # Get length of the maximum length string in tuple
            ctuple.max_length = len(max(ctuple.items, key=len))
            tuple_max_lengths.add(ctuple.max_length)

        # Maximum length of all tuples
        # tuples_max = max(tuple_max_lengths)

        # Sum of lengths of all tuples max
        sum_lengths = 0
        for ctuple in compare_tuples:
            sum_lengths = sum_lengths + ctuple.max_length

        # Calculate normalised weight
        for ctuple in compare_tuples:
            ctuple.length_weight = 1.0
            if use_length_weight:
                # ctuple.length_weight = ctuple.max_length / tuples_max
                ctuple.length_weight = ctuple.max_length / sum_lengths

        # Calculate normalised-weighted similarity for each tuple
        similarities = []
        for ctuple in compare_tuples:
            similarities.append(ctuple.normalised_similarity * ctuple.weight *
                                ctuple.length_weight)

        # Calculate Dataset Similarity Coefficient (if not using length_weight,
        # because length_weight is already normalised)
        if use_length_weight:
            sim = functools.reduce(lambda a, b: a + b, similarities)
        else:
            sim = functools.reduce(lambda a, b: a + b,
                                   similarities) / len(similarities)

        return sim
コード例 #9
0
ファイル: main.py プロジェクト: intv0id/TFJMBot
 def on_pubmsg(self, serv, ev):
     message = ev.arguments[0]
     Processing.processMessage(message, serv, ev.target)
コード例 #10
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
    def test_substitution_no_attrs(self):
        attributes = []
        p = Processing()
        final_str = p.substitute(attributes, self.datasets[0])

        self.assertEqual(final_str, "")
コード例 #11
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
 def test_no_getAttributeValue(self):
     attr_name = "fail"
     attr_list = self.datasets[0].attributes
     p = Processing()
     value = p.getAttributeValue(attr_name, attr_list)
     self.assertIsNone(value)
コード例 #12
0
ファイル: processing.py プロジェクト: EC-SEAL/reconciliation
 def test_clean_spaces(self):
     input_string = "   a  b    c    d "
     expected_string = "a b c d"
     p = Processing()
     output_string = Tools.clean_spaces(input_string)
     self.assertEqual(expected_string, output_string)