コード例 #1
0
def test_search():
    dc = StenoDictionaryCollection()

    # Similarity is based on string equality after removing case and stripping special characters from the ends.
    d1 = StenoDictionary()
    d1[('WAOUFL', )] = 'beautiful'
    d1[('PWAOUFL', )] = 'Beautiful'
    d1[('PWAOUT', '-FL')] = '{^BEAUTIFUL}  '
    d1[('ULG', )] = 'ugly'
    dc.set_dicts([d1])
    assert dc.find_similar('beautiful') == [('Beautiful', {('PWAOUFL', )}),
                                            ('beautiful', {('WAOUFL', )}),
                                            ('{^BEAUTIFUL}  ', {('PWAOUT',
                                                                 '-FL')})]

    assert dc.find_similar('{#BEAUtiful}{^}') == [
        ('Beautiful', {('PWAOUFL', )}), ('beautiful', {('WAOUFL', )}),
        ('{^BEAUTIFUL}  ', {('PWAOUT', '-FL')})
    ]

    # Translations found in multiple dicts should combine non-overlapping keys in the results.
    d2 = StenoDictionary()
    del d1[('PWAOUT', '-FL')]
    d2[('PW-FL', )] = 'beautiful'
    dc.set_dicts([d1, d2])
    assert dc.find_similar('beautiful') == [('Beautiful', {('PWAOUFL', )}),
                                            ('beautiful', {('WAOUFL', ),
                                                           ('PW-FL', )})]

    # If all possible keys for a translation are overridden, that translation should not be returned.
    d3 = StenoDictionary()
    d3[('PW-FL', )] = 'not beautiful'
    d3[('WAOUFL', )] = 'not beautiful'
    dc.set_dicts([d3, d1, d2])
    assert dc.find_similar('beautiful') == [('Beautiful', {('PWAOUFL', )})]

    # For partial word search, similar words will be returned first, but if the count is greater than that,
    # the next words in sorted order which are supersets are returned. Also stops at the end of the dictionary.
    dc.set_dicts([d1])
    d1[('PWAOU', )] = 'beau'
    d1[('PWAOUFL', 'HREU')] = 'beautifully'
    d1[('UG', 'HREU', '-PBS')] = 'ugliness'
    assert dc.find_partial('beau',
                           count=4) == [('beau', {('PWAOU', )}),
                                        ('Beautiful', {('PWAOUFL', )}),
                                        ('beautiful', {('WAOUFL', )}),
                                        ('beautifully', {('PWAOUFL', 'HREU')})]
    assert dc.find_partial('UGLY', count=2) == [('ugly', {('ULG', )})]

    # Even if a word isn't present, the search will return words going forward
    # from the index where it would be found if it was there.
    assert dc.find_partial('beaut',
                           count=3) == [('Beautiful', {('PWAOUFL', )}),
                                        ('beautiful', {('WAOUFL', )}),
                                        ('beautifully', {('PWAOUFL', 'HREU')})]

    # Regex search is straightforward; return up to count entries in order that match the given regular expression.
    # If no regex metacharacters are present, should just be a case-sensitive starts-with search.
    assert dc.find_regex('beau',
                         count=4) == [('beau', {('PWAOU', )}),
                                      ('beautiful', {('WAOUFL', )}),
                                      ('beautifully', {('PWAOUFL', 'HREU')})]
    assert dc.find_regex('beautiful.?.?',
                         count=2) == [('beautiful', {('WAOUFL', )}),
                                      ('beautifully', {('PWAOUFL', 'HREU')})]
    assert dc.find_regex(' beautiful', count=3) == []
    assert dc.find_regex('(b|u).{3}$', count=2) == [('beau', {('PWAOU', )}),
                                                    ('ugly', {('ULG', )})]
    assert dc.find_regex('.*ly',
                         count=5) == [('beautifully', {('PWAOUFL', 'HREU')}),
                                      ('ugly', {('ULG', )})]

    # Regex errors won't raise if the algorithm short circuits a pattern with no possible matches.
    assert dc.find_regex('an open group that doesn\'t raise(', count=5) == []
    with pytest.raises(re.error):
        print(dc.find_regex('beautiful...an open group(', count=1))