def test_unicode_blocks(): from whoosh.support.unicode import blocks, blockname, blocknum assert_equal(blockname(u('a')), 'Basic Latin') assert_equal(blockname(unichr(0x0b80)), 'Tamil') assert_equal(blockname(unichr(2048)), None) assert_equal(blocknum(u('a')), 0) assert_equal(blocknum(unichr(0x0b80)), 22) assert_equal(blocknum(unichr(2048)), None) assert_equal(blocknum(u('a')), blocks.Basic_Latin) #@UndefinedVariable assert_equal(blocknum(unichr(0x0b80)), blocks.Tamil) #@UndefinedVariable
def test_unicode_blocks(): from whoosh.support.unicode import blocks, blockname, blocknum assert blockname(u('a')) == 'Basic Latin' assert blockname(unichr(0x0b80)) == 'Tamil' assert blockname(unichr(2048)) is None assert blocknum(u('a')) == 0 assert blocknum(unichr(0x0b80)) == 22 assert blocknum(unichr(2048)) is None assert blocknum(u('a')) == blocks.Basic_Latin # @UndefinedVariable assert blocknum(unichr(0x0b80)) == blocks.Tamil # @UndefinedVariable
def __init__(self, dbfile, offset, expand=True): self.id = offset self.dbfile = dbfile dbfile.seek(offset) flags = dbfile.read_byte() self.final = bool(flags & 1) self._edges = {} if flags & 2: singles = flags & 4 bytes = flags & 8 nkeys = dbfile.read_varint() ptrs = dbfile.read_array("I", nkeys) for i in xrange(nkeys): ptr = ptrs[i] if singles: if bytes: charnum = dbfile.read_byte() else: charnum = dbfile.read_ushort() self._edges[unichr(charnum)] = ptr else: key = utf8decode(dbfile.read_string())[0] if len(key) > 1 and expand: self._edges[key[0]] = PatNode(dbfile, key[1:], ptr) else: self._edges[key] = ptr
def find_matches(dfa, cur): unull = unichr(0) term = cur.text() if term is None: return match = dfa.next_valid_string(term) while match: cur.find(match) term = cur.text() if term is None: return if match == term: yield match term += unull match = dfa.next_valid_string(term)
def find_next_edge(self, s, label, asbytes): if label is None: label = b"\x00" if asbytes else u'\0' else: label = (label + 1) if asbytes else unichr(ord(label) + 1) trans = self.transitions.get(s, {}) if label in trans or s in self.defaults: return label try: labels = self.outlabels[s] except KeyError: self.outlabels[s] = labels = sorted(trans) pos = bisect_left(labels, label) if pos < len(labels): return labels[pos] return None
from __future__ import print_function import itertools import operator import sys from bisect import bisect_left from whoosh.compat import iteritems, next, text_type, unichr, xrange unull = unichr(0) # Marker constants class Marker(object): def __init__(self, name): self.name = name def __repr__(self): return "<%s>" % self.name EPSILON = Marker("EPSILON") ANY = Marker("ANY") # Base class class FSA(object): def __init__(self, initial):
from __future__ import print_function import itertools import operator import sys from bisect import bisect_left from collections import defaultdict from whoosh.compat import iteritems, next, text_type, unichr, xrange unull = unichr(0) # Marker constants class Marker(object): def __init__(self, name): self.name = name def __repr__(self): return "<%s>" % self.name EPSILON = Marker("EPSILON") ANY = Marker("ANY") # Base class class FSA(object):
def random_token(): return "".join( unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))
def random_token(): return "".join(unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))