Example #1
0
def test_unicode_blocks():
    from whoosh.support.unicode import blocks, blockname, blocknum

    assert_equal(blockname(u('a')), 'Basic Latin')
    assert_equal(blockname(unichr(0x0b80)), 'Tamil')
    assert_equal(blockname(unichr(2048)), None)
    assert_equal(blocknum(u('a')), 0)
    assert_equal(blocknum(unichr(0x0b80)), 22)
    assert_equal(blocknum(unichr(2048)), None)
    assert_equal(blocknum(u('a')), blocks.Basic_Latin)  #@UndefinedVariable
    assert_equal(blocknum(unichr(0x0b80)), blocks.Tamil)  #@UndefinedVariable
Example #2
0
def test_unicode_blocks():
    from whoosh.support.unicode import blocks, blockname, blocknum
    
    assert_equal(blockname(u('a')), 'Basic Latin')
    assert_equal(blockname(unichr(0x0b80)), 'Tamil')
    assert_equal(blockname(unichr(2048)), None)
    assert_equal(blocknum(u('a')), 0)
    assert_equal(blocknum(unichr(0x0b80)), 22)
    assert_equal(blocknum(unichr(2048)), None)
    assert_equal(blocknum(u('a')), blocks.Basic_Latin)  #@UndefinedVariable
    assert_equal(blocknum(unichr(0x0b80)), blocks.Tamil)  #@UndefinedVariable
Example #3
0
def test_unicode_blocks():
    from whoosh.support.unicode import blocks, blockname, blocknum

    assert blockname(u('a')) == 'Basic Latin'
    assert blockname(unichr(0x0b80)) == 'Tamil'
    assert blockname(unichr(2048)) is None
    assert blocknum(u('a')) == 0
    assert blocknum(unichr(0x0b80)) == 22
    assert blocknum(unichr(2048)) is None
    assert blocknum(u('a')) == blocks.Basic_Latin  # @UndefinedVariable
    assert blocknum(unichr(0x0b80)) == blocks.Tamil  # @UndefinedVariable
Example #4
0
def test_unicode_blocks():
    from whoosh.support.unicode import blocks, blockname, blocknum

    assert blockname(u('a')) == 'Basic Latin'
    assert blockname(unichr(0x0b80)) == 'Tamil'
    assert blockname(unichr(2048)) is None
    assert blocknum(u('a')) == 0
    assert blocknum(unichr(0x0b80)) == 22
    assert blocknum(unichr(2048)) is None
    assert blocknum(u('a')) == blocks.Basic_Latin  # @UndefinedVariable
    assert blocknum(unichr(0x0b80)) == blocks.Tamil  # @UndefinedVariable
Example #5
0
    def __init__(self, dbfile, offset, expand=True):
        self.id = offset
        self.dbfile = dbfile

        dbfile.seek(offset)
        flags = dbfile.read_byte()
        self.final = bool(flags & 1)
        self._edges = {}
        if flags & 2:
            singles = flags & 4
            bytes = flags & 8

            nkeys = dbfile.read_varint()

            ptrs = dbfile.read_array("I", nkeys)
            for i in xrange(nkeys):
                ptr = ptrs[i]
                if singles:
                    if bytes:
                        charnum = dbfile.read_byte()
                    else:
                        charnum = dbfile.read_ushort()
                    self._edges[unichr(charnum)] = ptr
                else:
                    key = utf8decode(dbfile.read_string())[0]
                    if len(key) > 1 and expand:
                        self._edges[key[0]] = PatNode(dbfile, key[1:], ptr)
                    else:
                        self._edges[key] = ptr
Example #6
0
    def __init__(self, dbfile, offset, expand=True):
        self.id = offset
        self.dbfile = dbfile

        dbfile.seek(offset)
        flags = dbfile.read_byte()
        self.final = bool(flags & 1)
        self._edges = {}
        if flags & 2:
            singles = flags & 4
            bytes = flags & 8

            nkeys = dbfile.read_varint()

            ptrs = dbfile.read_array("I", nkeys)
            for i in xrange(nkeys):
                ptr = ptrs[i]
                if singles:
                    if bytes:
                        charnum = dbfile.read_byte()
                    else:
                        charnum = dbfile.read_ushort()
                    self._edges[unichr(charnum)] = ptr
                else:
                    key = utf8decode(dbfile.read_string())[0]
                    if len(key) > 1 and expand:
                        self._edges[key[0]] = PatNode(dbfile, key[1:], ptr)
                    else:
                        self._edges[key] = ptr
Example #7
0
    def find_matches(dfa, cur):
        unull = unichr(0)

        term = cur.text()
        if term is None:
            return

        match = dfa.next_valid_string(term)
        while match:
            cur.find(match)
            term = cur.text()
            if term is None:
                return
            if match == term:
                yield match
                term += unull
            match = dfa.next_valid_string(term)
Example #8
0
    def find_matches(dfa, cur):
        unull = unichr(0)

        term = cur.text()
        if term is None:
            return

        match = dfa.next_valid_string(term)
        while match:
            cur.find(match)
            term = cur.text()
            if term is None:
                return
            if match == term:
                yield match
                term += unull
            match = dfa.next_valid_string(term)
Example #9
0
    def find_next_edge(self, s, label, asbytes):
        if label is None:
            label = b"\x00" if asbytes else u'\0'
        else:
            label = (label + 1) if asbytes else unichr(ord(label) + 1)
        trans = self.transitions.get(s, {})
        if label in trans or s in self.defaults:
            return label

        try:
            labels = self.outlabels[s]
        except KeyError:
            self.outlabels[s] = labels = sorted(trans)

        pos = bisect_left(labels, label)
        if pos < len(labels):
            return labels[pos]
        return None
Example #10
0
    def find_next_edge(self, s, label, asbytes):
        if label is None:
            label = b"\x00" if asbytes else u'\0'
        else:
            label = (label + 1) if asbytes else unichr(ord(label) + 1)
        trans = self.transitions.get(s, {})
        if label in trans or s in self.defaults:
            return label

        try:
            labels = self.outlabels[s]
        except KeyError:
            self.outlabels[s] = labels = sorted(trans)

        pos = bisect_left(labels, label)
        if pos < len(labels):
            return labels[pos]
        return None
Example #11
0
from __future__ import print_function

import itertools
import operator
import sys
from bisect import bisect_left

from whoosh.compat import iteritems, next, text_type, unichr, xrange


unull = unichr(0)


# Marker constants

class Marker(object):
    def __init__(self, name):
        self.name = name

    def __repr__(self):
        return "<%s>" % self.name


EPSILON = Marker("EPSILON")
ANY = Marker("ANY")


# Base class

class FSA(object):
    def __init__(self, initial):
Example #12
0
from __future__ import print_function

import itertools
import operator
import sys
from bisect import bisect_left
from collections import defaultdict

from whoosh.compat import iteritems, next, text_type, unichr, xrange


unull = unichr(0)


# Marker constants

class Marker(object):
    def __init__(self, name):
        self.name = name

    def __repr__(self):
        return "<%s>" % self.name


EPSILON = Marker("EPSILON")
ANY = Marker("ANY")


# Base class

class FSA(object):
Example #13
0
 def random_token():
     return "".join(
         unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))
Example #14
0
 def random_token():
     return "".join(unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))