Exemple #1
0
    def _find_suggestions(self):
        from yt.funcs import levenshtein_distance

        field = self.field
        ds = self.ds

        suggestions = {}
        if not isinstance(field, tuple):
            ftype, fname = None, field
        elif field[1] is None:
            ftype, fname = None, field[0]
        else:
            ftype, fname = field

        # Limit the suggestions to a distance of 3 (at most 3 edits)
        # This is very arbitrary, but is picked so that...
        # - small typos lead to meaningful suggestions (e.g. `densty` -> `density`)
        # - we don't suggest unrelated things (e.g. `pressure` -> `density` has a distance
        #   of 6, we definitely do not want it)
        # A threshold of 3 seems like a good middle point.
        max_distance = 3

        # Suggest (ftype, fname), with alternative ftype
        for ft, fn in ds.derived_field_list:
            if fn.lower() == fname.lower() and (ftype is None or
                                                ft.lower() != ftype.lower()):
                suggestions[ft, fn] = 0

        if ftype is not None:
            # Suggest close matches using levenshtein distance
            fields_str = {_: str(_).lower() for _ in ds.derived_field_list}
            field_str = str(field).lower()

            for (ft, fn), fs in fields_str.items():
                distance = levenshtein_distance(field_str,
                                                fs,
                                                max_dist=max_distance)
                if distance < max_distance:
                    if (ft, fn) in suggestions:
                        continue
                    suggestions[ft, fn] = distance

        # Return suggestions sorted by increasing distance (first are most likely)
        self.suggestions = [(ft, fn) for (
            ft,
            fn), distance in sorted(suggestions.items(), key=lambda v: v[1])]
Exemple #2
0
def test_levenshtein():
    assert_equal(levenshtein_distance("abcdef", "abcdef"), 0)

    # Deletions / additions
    assert_equal(levenshtein_distance("abcdef", "abcde"), 1)
    assert_equal(levenshtein_distance("abcdef", "abcd"), 2)
    assert_equal(levenshtein_distance("abcdef", "abc"), 3)

    assert_equal(levenshtein_distance("abcdf", "abcdef"), 1)
    assert_equal(levenshtein_distance("cdef", "abcdef"), 2)
    assert_equal(levenshtein_distance("bde", "abcdef"), 3)

    # Substitutions
    assert_equal(levenshtein_distance("abcd", "abc_"), 1)
    assert_equal(levenshtein_distance("abcd", "ab__"), 2)
    assert_equal(levenshtein_distance("abcd", "a___"), 3)
    assert_equal(levenshtein_distance("abcd", "____"), 4)

    # Deletion + Substitutions
    assert_equal(levenshtein_distance("abcd", "abc_z"), 2)
    assert_equal(levenshtein_distance("abcd", "ab__zz"), 4)
    assert_equal(levenshtein_distance("abcd", "a___zzz"), 6)
    assert_equal(levenshtein_distance("abcd", "____zzzz"), 8)

    # Max distance
    assert_equal(levenshtein_distance("abcd", "", max_dist=0), 1)
    assert_equal(levenshtein_distance("abcd", "", max_dist=3), 4)
    assert_equal(levenshtein_distance("abcd", "", max_dist=10), 4)
    assert_equal(levenshtein_distance("abcd", "", max_dist=1), 2)
    assert_equal(levenshtein_distance("abcd", "a", max_dist=2), 3)
    assert_equal(levenshtein_distance("abcd", "ad", max_dist=2), 2)
    assert_equal(levenshtein_distance("abcd", "abd", max_dist=2), 1)
    assert_equal(levenshtein_distance("abcd", "abcd", max_dist=2), 0)