Example #1
0
 def find_first_word(haystack):
     match_pos, match_word = -1, None
     for w in original_words:
         idx = index_of(w, haystack, lang=lang)
         if idx > -1 and (match_pos == -1 or match_pos > idx):
             match_pos, match_word = idx, w
     return match_pos, match_word
Example #2
0
 def test_break_iterator(self):
     ' Test the break iterator '
     from calibre.spell.break_iterator import split_into_words as split, index_of, split_into_words_and_positions
     for q in ('one two three', ' one two three', 'one\ntwo  three ', ):
         self.ae(split(unicode(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
     self.ae(split(u'I I\'m'), ['I', "I'm"])
     self.ae(split(u'out-of-the-box'), ['out-of-the-box'])
     self.ae(split(u'-one two-'), ['one', 'two'])
     self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (7 if icu.is_narrow_build else 6, 5)])
     for needle, haystack, pos in (
             ('word', 'a word b', 2),
             ('word', 'a word', 2),
             ('one-two', 'a one-two punch', 2),
             ('one-two', 'one-two punch', 0),
             ('one-two', 'one-two', 0),
             ('one', 'one-two one', 8),
             ('one-two', 'one-two-three one-two', 14),
             ('one', 'onet one', 5),
             ('two', 'one-two two', 8),
             ('i', 'i', 0),
             ('i', 'six i', 4),
             ('i', '', -1), ('', '', -1), ('', 'i', -1),
             ('i', 'six clicks', -1),
             ('i', '\U0001f431 i', (3 if icu.is_narrow_build else 2)),
     ):
         fpos = index_of(needle, haystack)
         self.ae(pos, fpos, 'Failed to find index of %r in %r (%d != %d)' % (needle, haystack, pos, fpos))
Example #3
0
 def find_first_word(haystack):
     match_pos, match_word = -1, None
     for w in original_words:
         idx = index_of(w, haystack, lang=lang)
         if idx > -1 and (match_pos == -1 or match_pos > idx):
             match_pos, match_word = idx, w
     return match_pos, match_word
Example #4
0
 def test_break_iterator(self):
     ' Test the break iterator '
     from calibre.spell.break_iterator import split_into_words as split, index_of, split_into_words_and_positions, count_words
     for q in (
             'one two three',
             ' one two three',
             'one\ntwo  three ',
     ):
         self.ae(split(str(q)), ['one', 'two', 'three'],
                 'Failed to split: %r' % q)
     self.ae(split('I I\'m'), ['I', "I'm"])
     self.ae(split('out-of-the-box'), ['out-of-the-box'])
     self.ae(split('-one two-'), ['-one', 'two-'])
     self.ae(split('-one a-b-c-d e'), ['-one', 'a-b-c-d', 'e'])
     self.ae(split('-one -a-b-c-d- e'), ['-one', '-a-b-c-d-', 'e'])
     self.ae(split_into_words_and_positions('one \U0001f431 three'),
             [(0, 3), (6, 5)])
     self.ae(count_words('a b c d e f'), 6)
     for needle, haystack, pos in (
         ('word', 'a word b', 2),
         ('word', 'a word', 2),
         ('one-two', 'a one-two punch', 2),
         ('one-two', 'one-two punch', 0),
         ('one-two', 'one-two', 0),
         ('one', 'one-two one', 8),
         ('one-two', 'one-two-three one-two', 14),
         ('one', 'onet one', 5),
         ('two', 'one-two two', 8),
         ('two', 'two-one two', 8),
         ('-two', 'one-two -two', 8),
         ('-two', 'two', -1),
         ('i', 'i', 0),
         ('i', 'six i', 4),
         ('i', '', -1),
         ('', '', -1),
         ('', 'i', -1),
         ('i', 'six clicks', -1),
         ('i', '\U0001f431 i', 2),
         ('-a', 'b -a', 2),
         ('a-', 'a-b a- d', 4),
         ('-a-', 'b -a -a-', 5),
         ('-a-', '-a-', 0),
         ('-a-', 'a-', -1),
         ('-a-', '-a', -1),
         ('-a-', 'a', -1),
         ('a-', 'a-', 0),
         ('-a', '-a', 0),
         ('a-b-c-', 'a-b-c-d', -1),
         ('a-b-c-', 'a-b-c-.', 0),
         ('a-b-c-', 'a-b-c-d a-b-c- d', 8),
     ):
         fpos = index_of(needle, haystack)
         self.ae(
             pos, fpos, 'Failed to find index of %r in %r (%d != %d)' %
             (needle, haystack, pos, fpos))
Example #5
0
def replace(text, original_word, new_word, lang):
    indices = []
    original_word, new_word, text = unicode_type(original_word), unicode_type(new_word), unicode_type(text)
    q = text
    offset = 0
    while True:
        idx = index_of(original_word, q, lang=lang)
        if idx == -1:
            break
        indices.append(offset + idx)
        offset += idx + len(original_word)
        q = text[offset:]
    for idx in reversed(indices):
        text = text[:idx] + new_word + text[idx+len(original_word):]
    return text, bool(indices)
Example #6
0
 def test_break_iterator(self):
     ' Test the break iterator '
     from calibre.spell.break_iterator import split_into_words as split, index_of
     for q in ('one two three', ' one two three', 'one\ntwo  three ', 'one-two,three'):
         self.ae(split(unicode(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
     self.ae(split(u'I I\'m'), ['I', "I'm"])
     self.ae(0, index_of('i', 'i'))
     self.ae(4, index_of('i', 'six i'))
     self.ae(-1, index_of('i', ''))
     self.ae(-1, index_of('', ''))
     self.ae(-1, index_of('', 'i'))
     self.ae(-1, index_of('i', 'six clicks'))
Example #7
0
 def test_break_iterator(self):
     ' Test the break iterator '
     from calibre.spell.break_iterator import split_into_words as split, index_of
     for q in ('one two three', ' one two three', 'one\ntwo  three ', 'one-two,three'):
         self.ae(split(unicode(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
     self.ae(split(u'I I\'m'), ['I', "I'm"])
     self.ae(0, index_of('i', 'i'))
     self.ae(4, index_of('i', 'six i'))
     self.ae(-1, index_of('i', ''))
     self.ae(-1, index_of('', ''))
     self.ae(-1, index_of('', 'i'))
     self.ae(-1, index_of('i', 'six clicks'))
Example #8
0
 def test_break_iterator(self):
     ' Test the break iterator '
     from calibre.spell.break_iterator import split_into_words as split, index_of, split_into_words_and_positions
     for q in ('one two three', ' one two three', 'one\ntwo  three ', 'one-two,three'):
         self.ae(split(unicode(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
     self.ae(split(u'I I\'m'), ['I', "I'm"])
     self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (6 if sys.maxunicode >= 0x10ffff else 7, 5)])
     self.ae(0, index_of('i', 'i'))
     self.ae(4, index_of('i', 'six i'))
     self.ae(-1, index_of('i', ''))
     self.ae(-1, index_of('', ''))
     self.ae(-1, index_of('', 'i'))
     self.ae(-1, index_of('i', 'six clicks'))