Ejemplo n.º 1
0
    def test_dominating_name_weights(self):
        a, b = self._get_car_lists()
        attr_to_weight = {
            (lambda c: c.seats): 0.5,
            (lambda c: c.name): 9,
            (lambda c: c.features): 1.2
        }

        matcher = ObjectListMatcher(attr_to_weight)

        assert matcher.get_indices(a, b) == [None, 2, 1, 0]
Ejemplo n.º 2
0
    def test_matching_works(self):
        a, b = self._get_car_lists()

        attr_to_weight = {
            (lambda c: c.seats): 4,
            (lambda c: c.name): 1,
            (lambda c: c.features): 5
        }
        matcher = ObjectListMatcher(attr_to_weight)

        assert matcher.get_indices(a, b) == [2, None, 1, 0]
Ejemplo n.º 3
0
    def test_match_a_to_nothing(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]
        b = []

        # When no b element could be matched to an a element, -1 is used.
        assert matcher.get_indices(a, b) == [None, None]
Ejemplo n.º 4
0
    def test_numeric_asymmetry(self):
        matcher = ObjectListMatcher.for_sequence([0.6, 0.4])
        matcher.should_store_similarity_matrix = True

        a = [(3, 3), (8, 5), (9, 1)]
        b = [(9, 8), (3, 2), (3, 4)]

        # as (9, 8) is the best match for (9, 1), item (8, 5) will be matched to (3, 4)
        assert matcher.get_indices(a, b) == [1, 2,
                                             0], formatted_matrix(matcher)

        matrix = matcher.similarity_matrix

        # matrix[0] are the match scores of element (3, 3) in a to all elements in b
        assert matrix[0][1] == matrix[0][2]

        # (9, 1) is most similar to (9, 8) and second most similar to (3, 2)
        assert matrix[2][0] > matrix[2][2]
        assert matrix[2][1] > matrix[2][2]

        # What happens here may seem confusing, but this is a result of the following
        # asymmetry: the best match in b for (9, 1) is (9, 8), but the best match in a
        # for (9, 8) isn't (9, 1), it's (8, 5). This can be seen by calculating the
        # weighted delta between (9, 8) and each of them (smaller delta = more similar):
        #   delta to (9, 1):    (9-9) * 0.6 + (8-1) * 0.4 = 2.8
        #   delta to (8, 5):    (9-8) * 0.6 + (8-5) * 0.4 = 1.8
        assert matcher.get_indices(b, a) == [1, 0,
                                             2], formatted_matrix(matcher)
Ejemplo n.º 5
0
    def test_simple_unbalanced(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['hell', 'gel', 'shell']
        b = ['yell']
        assert matcher.get_indices(a, b) == [0, None, None]

        a = ['gel', 'hell', 'shell']
        assert matcher.get_indices(a, b) == [None, 0, None]
Ejemplo n.º 6
0
    def test_change_weights(self):
        # same as in previous test
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 1), ('abaa', 'bcca', 2)]

        matcher.update_attr_to_weight({lambda i: i[1]: 1})
        assert matcher.get_indices(a, b) == [0, 1]
Ejemplo n.º 7
0
    def test_nothing_to_match_b_to(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = []
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        # As this returns the indices of b to match a, it always has the size of a.
        assert matcher.get_indices(a, b) == []
        assert matcher.get_indices([], []) == []
Ejemplo n.º 8
0
    def test_other_now_barely_better(self):
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 1),
             ('abaa', 'bcca', 2)]  # third element of first changed

        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(b, a) == [1, 0]
Ejemplo n.º 9
0
    def test_double_weight(self):
        matcher = ObjectListMatcher.for_sequence([4, 2])
        a = [('Great Song', 'Law', 2), ('Night Mix', 'Beach', 1)]
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        assert matcher.get_indices(a, b) == [0, 1]

        # changed "Law" to "Low"
        a = [('Great Song', 'Low', 2), ('Night Mix', 'Beach', 1)]
        assert matcher.get_indices(a, b) == [1, 0]
Ejemplo n.º 10
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._export_collectors = []
        self._import_or_export_option_index = None

        self._album_id_matcher: ObjectListMatcher[AlbumId] = ObjectListMatcher(
            {  #
                lambda a: a.title: 9,  # title is the most reliable
                lambda a: a.artist: 4.5,  #
                lambda a: a.tracks: 1.2,  #
                lambda a: a.last_directory_parts:
                1,  # needed in case the album has no tags
                lambda a: a.discs:
                0.8,  # multi disc albums sometimes become single disc
                lambda a: a.id_value:
                0.5,  # is likely to change unless exact same album
            })
        # We want check similarity afterwards, so it needs be as accurate as possible
        self._album_id_matcher.should_store_similarity_matrix = True
        self._album_id_matcher.should_go_through_every_attribute = True

        self._track_id_matcher: ObjectListMatcher[TrackId] = ObjectListMatcher(
            {  #
                lambda t: t.title: 8,  #
                lambda t: t.artist: 3.5,  #
                lambda t: t.track: 1.2,  #
                lambda t: t.file_stem:
                1,  # needed in case the track has no tags
                lambda t: t.disc: 0.8,  #
            })
        self._track_id_matcher.should_store_similarity_matrix = True
        self._album_id_matcher.should_go_through_every_attribute = True

        self._album_id_to_export_path = {}

        submenu = Gtk.Menu()
        self._init_collectors_and_menu(submenu)

        if submenu.get_children():
            self.set_submenu(submenu)
        else:
            self.set_sensitive(False)
Ejemplo n.º 11
0
    def test_all_elements_in_both_but_different_order(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['cookie', 'beach', 'house']
        b = ['house', 'cookie', 'beach']

        b_match_indices = matcher.get_indices(a, b)
        for a_item, b_idx in zip(a, b_match_indices):
            assert a_item == b[b_idx]

        b = ['house', 'beach', 'cookie']
        assert matcher.get_indices(a, b) == [2, 1, 0]
Ejemplo n.º 12
0
    def test_numeric_if_both_good_match_current_order_preferred(self):
        # we pre-normalized the weights for clarity here (they're always normalized)
        matcher = ObjectListMatcher.for_sequence([0.6, 0.4])

        a = [(3, 2), (3, 4)]
        b = [(99, 99), (3, 3)]

        # despite having the same delta to (3, 3), here (3, 2) should be preferred to
        # (3, 4), as (3, 2) is a closer match (in terms of their indices)
        assert matcher.get_indices(a, b) == [1, 0]
        assert matcher.get_indices(b, a) == [1, 0]
Ejemplo n.º 13
0
    def test_clear_match(self):
        matcher = ObjectListMatcher.for_sequence([3, 1.5, 2])
        a = [('cacc', 'cacc', 2), ('bacc', 'baca', 1)]
        b = [('caba', 'bacc', 2), ('abaa', 'bcca', 2)]

        assert matcher.get_indices(a, b) == [0, 1]

        # test that repeating the same call doesn't change the result
        assert matcher.get_indices(a, b) == [0, 1]

        # in this case (same length), reversing arguments should result in the same list
        assert matcher.get_indices(b, a) == [0, 1]
Ejemplo n.º 14
0
    def test_minimum_similarity(self):
        a, b = self._get_car_lists()

        attr_to_weight = {(lambda c: c.seats): 3, (lambda c: c.features): 8}
        matcher = ObjectListMatcher(attr_to_weight)
        matcher.should_store_similarity_matrix = True

        assert matcher.get_indices(a, b) == [2, 0, 1,
                                             None], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.71
        assert matcher.get_indices(a, b) == [2, None, 1,
                                             None], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.9
        assert matcher.get_indices(a, b) == [None, None, 1,
                                             None], formatted_matrix(matcher)
Ejemplo n.º 15
0
    def test_minimum_similarity(self):
        matcher = ObjectListMatcher.of_identity()
        a = ['mess', 'blessed', 'chess']
        b = ['pudding', 'xylophone', 'yes']
        matcher.should_store_similarity_matrix = True

        assert matcher.get_indices(a, b) == [2, 0,
                                             1], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.45
        assert matcher.get_indices(a, b) == [2, None,
                                             1], formatted_matrix(matcher)

        matcher.minimum_similarity_ratio = 0.6
        assert matcher.get_indices(a, b) == [None, None,
                                             None], formatted_matrix(matcher)
Ejemplo n.º 16
0
    def test_should_go_through_every_attribute(self):
        matcher = ObjectListMatcher.for_sequence([0.7, 0.3])
        matcher.should_store_similarity_matrix = True

        # There are undefeatable matches for the first attribute / weight here, and so
        # by default the algorithm will not even check the second attribute.
        a = [('a', -8), ('very clear', 0), ('way', 33), ('forward', 2)]
        b = [('very clear', 33), ('forward', -1), ('a', 5), ('way', 2)]

        assert matcher.get_indices(a, b) == [2, 0, 3, 1]
        partial_matrix = matcher.similarity_matrix

        matcher.should_go_through_every_attribute = True

        # Result definitely shouldn't change
        assert matcher.get_indices(a, b) == [2, 0, 3,
                                             1], formatted_matrix(matcher)

        # But in this case the matrix should have
        assert matcher.similarity_matrix != partial_matrix, formatted_matrix(
            matcher)
Ejemplo n.º 17
0
    def test_more_in_b(self):
        matcher = ObjectListMatcher.for_sequence([7, 1])
        a = [('Great Sea', 'Light', 2)]
        b = [('Great Song', 'Beach', 1), ('Great Sea', 'Low', 2)]

        assert matcher.get_indices(a, b) == [1]
Ejemplo n.º 18
0
    def test_all_the_same(self):
        matcher = ObjectListMatcher.for_sequence([0.2, 0.7, 0.1])

        x = (9, 9, 9)
        assert matcher.get_indices([x, x, x], [x, x, x]) == [0, 1, 2]
Ejemplo n.º 19
0
 def test_empty_weight_not_allowed(self):
     self.assertRaises(ValueError, lambda: ObjectListMatcher({}))
Ejemplo n.º 20
0
 def test_negative_weights_not_allowed(self):
     self.assertRaises(ValueError,
                       lambda: ObjectListMatcher({lambda i: int(i): -1}))