Beispiel #1
0
def __test__ (ut) :
    from nlplib.core.model import Seq, Gram, Word

    introduce = 'abc'

    # Test an odd length string.
    seq = 'hello'
    halves = list(token.map_over_indexes(token.halve, seq))

    ut.assert_equal(list(deletions(halves)), ['ello', 'hllo', 'helo', 'helo', 'hell'])
    ut.assert_equal(list(transpositions(halves)), ['ehllo', 'hlelo', 'hello', 'helol'])
    ut.assert_equal(list(replacements(halves, introduce)),
                    ['aello', 'bello', 'cello', 'hallo', 'hbllo', 'hcllo', 'healo', 'heblo', 'heclo', 'helao', 'helbo',
                     'helco', 'hella', 'hellb', 'hellc'])
    ut.assert_equal(list(insertions(halves, introduce)),
                    ['ahello', 'bhello', 'chello', 'haello', 'hbello', 'hcello', 'heallo', 'hebllo', 'hecllo',
                     'helalo', 'helblo', 'helclo', 'hellao', 'hellbo', 'hellco', 'helloa', 'hellob', 'helloc'])
    ut.assert_equal(alterations(seq, introduce),
                    {'heblo', 'heallo', 'hellb', 'hellco', 'ehllo', 'hellao', 'hbello', 'hecllo', 'bello', 'helol',
                     'helao', 'hallo', 'hlelo', 'helblo', 'hell', 'ello', 'helloa', 'helo', 'helclo', 'heclo',
                     'chello', 'cello', 'haello', 'hellc', 'aello', 'hella', 'hellbo', 'hcllo', 'hello', 'helbo',
                     'hbllo', 'helalo', 'hcello', 'hellob', 'helco', 'bhello', 'hllo', 'helloc', 'hebllo', 'healo',
                     'ahello'})

    # Test an even length string.
    seq = 'fish'
    halves = list(token.map_over_indexes(token.halve, seq))

    ut.assert_equal(list(deletions(halves)), ['ish', 'fsh', 'fih', 'fis'])
    ut.assert_equal(list(transpositions(halves)), ['ifsh', 'fsih', 'fihs'])
    ut.assert_equal(list(replacements(halves, introduce)),
                    ['aish', 'bish', 'cish', 'fash', 'fbsh', 'fcsh', 'fiah', 'fibh', 'fich', 'fisa', 'fisb', 'fisc'])
    ut.assert_equal(list(insertions(halves, introduce)),
                    ['afish', 'bfish', 'cfish', 'faish', 'fbish', 'fcish', 'fiash', 'fibsh', 'ficsh', 'fisah', 'fisbh',
                     'fisch', 'fisha', 'fishb', 'fishc'])
    ut.assert_equal(alterations(seq, introduce),
                    {'fcsh', 'aish', 'fsih', 'ifsh', 'fis', 'faish', 'fbsh', 'fih', 'fisbh', 'fibh', 'fisch', 'fishb',
                     'bish', 'afish', 'bfish', 'fbish', 'fsh', 'fishc', 'fcish', 'fisha', 'fibsh', 'ish', 'fiah',
                     'cfish', 'fisah', 'fich', 'fisb', 'fisc', 'fash', 'fisa', 'fiash', 'fihs', 'cish', 'ficsh'})

    for hello in ['hello', Seq('hello'), Word('hello')] :
        ut.assert_equal(similar(hello, 'ab', 1),
                        {'haello', 'helblo', 'helbo', 'hallo', 'hellob', 'helo', 'helloa', 'healo', 'hell', 'helalo',
                         'hbllo', 'hellbo', 'ello', 'hella', 'hellb', 'hello', 'aello', 'heallo', 'ahello', 'hbello',
                         'bhello', 'bello', 'helol', 'hllo', 'helao', 'hebllo', 'heblo', 'hellao', 'ehllo', 'hlelo'})

    ut.assert_equal(similar('h', 'ab', 1), {'hb', 'ha', 'ah', 'a', 'b', 'bh'})

    # Test with mixed types introduced.
    correct_output = {Gram('hello a'), Gram('b'), Gram('a'), Gram('hello b'), Gram('a hello'), Gram('b hello')}

    for introduce in ['ab', ['a', 'b'], [Seq('a'), Seq('b')], [Word('a'), Word('b')], [Gram('a'), Gram('b')]] :
        ut.assert_equal(similar(Gram('hello'), introduce, 1), correct_output)

    for a in ['a', Seq('a'), Gram('a'), Word('a')] :
        for b in ['b', Seq('b'), Gram('b'), Word('b')] :
            ut.assert_equal(similar(Gram('hello'), [a, b], 1), correct_output)

    for introduce in [['ab'], [Seq('ab')], [Gram('ab')], [Word('ab')]] :
        ut.assert_equal(similar(Gram('hello'), introduce, 1), {Gram('ab'), Gram('ab hello'), Gram('hello ab')})

    # Test <similar> using the words inside a gram.
    ut.assert_equal(similar(Gram('a b'), Gram('c d'), 1),
                    {Gram('d a b'), Gram('d b'), Gram('a c'), Gram('a'), Gram('b'), Gram('a d b'), Gram('a d'),
                     Gram('a b d'), Gram('b a'), Gram('a c b'), Gram('c a b'), Gram('c b'), Gram('a b c')})

    # Tests <similar> using whole grams.
    ut.assert_equal(similar(Gram('a b'), [Gram('c d'), Gram('e f')], 1),
                    {Gram('a c d'), Gram('a c d b'), Gram('b a'), Gram('c d a b'), Gram('a b e f'), Gram('c d b'),
                     Gram('a b c d'), Gram('a e f'), Gram('b'), Gram('a e f b'), Gram('a'), Gram('e f a b'),
                     Gram('e f b')})

    # Tests deep changes using <similar>.
    ut.assert_equal(similar('a', 'bc', 2),
                    {'bab', 'bac', 'cba', 'a', 'cab', 'c', 'b', 'ba', 'acc', 'acb', 'cac', 'abc', 'cca', 'abb', 'bca',
                     'bba', 'ac', 'ab', 'ca', 'bc', 'cc', 'cb', 'bb'})
Beispiel #2
0
def alterations (seq, introduce) :
    halves = list(token.map_over_indexes(token.halve, seq))
    return set().union(deletions(halves),
                       transpositions(halves),
                       replacements(halves, introduce),
                       insertions(halves, introduce))