コード例 #1
0
def test_match():
    c = apsw.Connection(":memory:")
    name = "simple"
    contents = [("abc def",), ("abc xyz",), ("あいうえお かきくけこ",), ("あいうえお らりるれろ",)]
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer()))
    c.cursor().execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.cursor().executemany("INSERT INTO fts VALUES(?)", contents)
    r = c.cursor().execute("SELECT * FROM fts").fetchall()
    assert len(r) == 4
    r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall()
    assert len(r) == 2
    r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'def'").fetchall()
    assert len(r) == 1 and r[0][0] == contents[0][0]
    r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'xyz'").fetchall()
    assert len(r) == 1 and r[0][0] == contents[1][0]
    r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall()
    assert len(r) == 0
    r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall()
    assert len(r) == 2
    r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'かきくけこ'").fetchall()
    assert len(r) == 1 and r[0][0] == contents[2][0]
    r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'らりるれろ'").fetchall()
    assert len(r) == 1 and r[0][0] == contents[3][0]
    r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall()
    assert len(r) == 0
    c.close()
コード例 #2
0
ファイル: test_base.py プロジェクト: enquos/sqlite-fts-python
def test_match():
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    name = 'simple'
    contents = [('abc def', ), ('abc xyz', ), ('あいうえお かきくけこ', ),
                ('あいうえお らりるれろ', )]
    fts.register_tokenizer(c, name,
                           fts.make_tokenizer_module(SimpleTokenizer()))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.executemany('INSERT INTO fts VALUES(?)', contents)
    assert r.rowcount == 4
    r = c.execute("SELECT * FROM fts").fetchall()
    assert len(r) == 4
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'def'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[0][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'xyz'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[1][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall()
    assert len(r) == 0
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'かきくけこ'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[2][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'らりるれろ'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[3][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall()
    assert len(r) == 0
    c.close()
コード例 #3
0
def test_reginster_tokenizer(name, t):
    c = sqlite3.connect(':memory:')
    tokenizer_module = fts.make_tokenizer_module(t)
    fts.register_tokenizer(c, name, tokenizer_module)
    v = c.execute("SELECT FTS3_TOKENIZER(?)", (name,)).fetchone()[0]
    assert ctypes.addressof(tokenizer_module) == struct.unpack("P", v)[0]
    c.close()
コード例 #4
0
def test_match():
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    name = 'simple'
    contents = [('abc def',),
                ('abc xyz',),
                ('あいうえお かきくけこ',),
                ('あいうえお らりるれろ',)]
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer()))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.executemany('INSERT INTO fts VALUES(?)', contents)
    assert r.rowcount == 4
    r = c.execute("SELECT * FROM fts").fetchall()
    assert len(r) == 4
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'def'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[0][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'xyz'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[1][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall()
    assert len(r) == 0
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'かきくけこ'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[2][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'らりるれろ'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[3][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall()
    assert len(r) == 0
    c.close()
コード例 #5
0
def test_make_tokenizer(c):
    tm = fts.make_tokenizer_module(SimpleTokenizer())
    assert all(
        getattr(tm, x) is not None
        for x in ('iVersion', 'xClose', 'xCreate', 'xDestroy', 'xLanguageid',
                  'xNext', 'xOpen'))
    c.close()
コード例 #6
0
def test_tokenizer_output(name, t):
    with sqlite3.connect(":memory:") as c:
        fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
        c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name))
        expect = [
            ("This", 0, 4, 0),
            ("is", 5, 7, 1),
            ("a", 8, 9, 2),
            ("test", 10, 14, 3),
            ("sentence", 15, 23, 4),
        ]
        for a, e in zip(
            c.execute(
                "SELECT token, start, end, position "
                "FROM tok1 WHERE input='This is a test sentence.'"
            ),
            expect,
        ):
            assert e == a

        s = "これ は テスト の 文 です"
        expect = [(None, 0, 0, 0)]
        for i, txt in enumerate(s.split()):
            expect.append(
                (txt, expect[-1][2], expect[-1][2] + len(txt.encode("utf-8")), i)
            )
        expect = expect[1:]
        for a, e in zip(
            c.execute(
                "SELECT token, start, end, position " "FROM tok1 WHERE input=?",
                [s.replace(" ", "")],
            ),
            expect,
        ):
            assert e == a
コード例 #7
0
ファイル: test_base.py プロジェクト: enquos/sqlite-fts-python
def test_tokenizer_output():
    name = 'simple'
    with sqlite3.connect(':memory:') as c:
        fts.register_tokenizer(c, name,
                               fts.make_tokenizer_module(SimpleTokenizer()))
        c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(
            name))
        expect = [("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2),
                  ("test", 10, 14, 3), ("sentence", 15, 23, 4)]
        for a, e in zip(
                c.execute("SELECT token, start, end, position "
                          "FROM tok1 WHERE input='This is a test sentence.'"),
                expect):
            assert e == a

        s = 'これ は テスト の 文 です'
        expect = [(None, 0, -1, 0)]
        for i, t in enumerate(s.split()):
            expect.append((t, expect[-1][2] + 1,
                           expect[-1][2] + 1 + len(t.encode('utf-8')), i))
        expect = expect[1:]
        for a, e in zip(
                c.execute("SELECT token, start, end, position "
                          "FROM tok1 WHERE input=?", [s]), expect):
            assert e == a
コード例 #8
0
    def setUp(self):
        name = 'test'
        conn = sqlite3.connect(':memory:')
        conn.row_factory = sqlite3.Row

        fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer()))

        conn.execute('CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})'.format(name))
        conn.execute('CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})'.format(name))

        values = [
            ('Make thing I',),
            ('Some thing φχικλψ thing',),
            ('Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. '
             'Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra.',),
            ('Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst.',),
            ('Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit.',),
            ('Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere.',),
            ]
        for n in ('fts3', 'fts4'):
            result = conn.executemany('INSERT INTO {0} VALUES(?)'.format(n), values)
            assert result.rowcount == len(values)

        conn.create_function('bm25', 2, ranking.bm25)
        conn.create_function('rank', 1, ranking.simple)

        self.testee = conn
コード例 #9
0
    def setUp(self):
        name = "test"
        conn = sqlite3.connect(":memory:")

        fts.register_tokenizer(conn, name, fts.make_tokenizer_module(DebugTokenizer()))
        conn.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))

        self.testee = conn
コード例 #10
0
ファイル: test_base.py プロジェクト: enquos/sqlite-fts-python
def test_register_tokenizer():
    name = 'simpe'
    c = sqlite3.connect(':memory:')
    tokenizer_module = fts.make_tokenizer_module(SimpleTokenizer())
    fts.register_tokenizer(c, name, tokenizer_module)
    v = c.execute("SELECT FTS3_TOKENIZER(?)", (name, )).fetchone()[0]
    assert int(ffi.cast('intptr_t', tokenizer_module)) == \
        struct.unpack("P", v)[0]
    c.close()
コード例 #11
0
def db():
    name = 'test'
    conn = sqlite3.connect(':memory:')

    fts.register_tokenizer(conn, name,
                           fts.make_tokenizer_module(DebugTokenizer()))
    conn.execute('CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})'.format(
        name))

    return conn
コード例 #12
0
def db():
    name = 'test'
    conn = sqlite3.connect(':memory:')

    fts.register_tokenizer(conn, name,
                           fts.make_tokenizer_module(DebugTokenizer()))
    conn.execute(
        'CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})'.format(name))

    return conn
コード例 #13
0
def test_insert():
    c = apsw.Connection(":memory:")
    name = "simple"
    content = "これは日本語で書かれています"
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer()))
    c.cursor().execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.cursor().execute("INSERT INTO fts VALUES(?)", (content,))
    assert c.changes() == 1
    r = c.cursor().execute("SELECT content FROM fts").fetchone()
    assert r[0] == content
    c.close()
コード例 #14
0
def test_createtable(name, t):
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute(sql)
    r = c.execute("SELECT * FROM sqlite_master WHERE type='table' AND name='fts'").fetchone()
    assert r
    assert r[str('type')] == 'table' and r[str('name')] == 'fts' and r[str('tbl_name')] == 'fts'
    assert r[str('sql')].upper() == sql.upper()
    c.close()
コード例 #15
0
def test_insert(name, t):
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    content = 'これは日本語で書かれています'
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.execute('INSERT INTO fts VALUES(?)', (content, ))
    assert r.rowcount == 1
    r = c.execute("SELECT * FROM fts").fetchone()
    assert r
    assert r[str('content')] == content
    c.close()
コード例 #16
0
def test_insert(name, t):
    c = sqlite3.connect(":memory:")
    c.row_factory = sqlite3.Row
    content = "これは日本語で書かれています"
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.execute("INSERT INTO fts VALUES(?)", (content,))
    assert r.rowcount == 1
    r = c.execute("SELECT * FROM fts").fetchone()
    assert r
    assert r[str("content")] == content
    c.close()
コード例 #17
0
def test_make_tokenizer(c):
    tm = fts.make_tokenizer_module(SimpleTokenizer())
    assert all(
        getattr(tm, x) is not None for x in (
            "iVersion",
            "xClose",
            "xCreate",
            "xDestroy",
            "xLanguageid",
            "xNext",
            "xOpen",
        ))
    c.close()
コード例 #18
0
def test_insert():
    c = apsw.Connection(':memory:')
    name = 'simple'
    content = 'これは日本語で書かれています'
    fts.register_tokenizer(c, name,
                           fts.make_tokenizer_module(SimpleTokenizer()))
    c.cursor().execute(
        "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.cursor().execute('INSERT INTO fts VALUES(?)', (content, ))
    assert c.changes() == 1
    r = c.cursor().execute("SELECT content FROM fts").fetchone()
    assert r[0] == content
    c.close()
コード例 #19
0
def test_createtable():
    c = apsw.Connection(':memory:')
    name = 'simple'
    sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)
    fts.register_tokenizer(c, name,
                           fts.make_tokenizer_module(SimpleTokenizer()))
    c.cursor().execute(sql)

    r = c.cursor().execute(
        "SELECT type, name, tbl_name, sql FROM sqlite_master WHERE type='table' AND name='fts'"
    ).fetchone()
    assert r == ('table', 'fts', 'fts', sql)
    c.close()
コード例 #20
0
ファイル: test_base.py プロジェクト: enquos/sqlite-fts-python
def test_insert():
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    name = 'simple'
    content = 'これは日本語で書かれています'
    fts.register_tokenizer(c, name,
                           fts.make_tokenizer_module(SimpleTokenizer()))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.execute('INSERT INTO fts VALUES(?)', (content, ))
    assert r.rowcount == 1
    r = c.execute("SELECT * FROM fts").fetchone()
    assert r
    assert r[str('content')] == content
    c.close()
コード例 #21
0
def test_createtable(name, t):
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute(sql)
    r = c.execute(
        "SELECT * FROM sqlite_master WHERE type='table' AND name='fts'"
    ).fetchone()
    assert r
    assert r[str('type')] == 'table' and r[str('name')] == 'fts' and r[str(
        'tbl_name')] == 'fts'
    assert r[str('sql')].upper() == sql.upper()
    c.close()
コード例 #22
0
def test_tokenizer_output(name, t):
    with sqlite3.connect(':memory:') as c:
        fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
        c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name))
        expect = [("This", 0, 4, 0), ("is", 5, 7, 1),
                  ("a", 8, 9, 2), ("test", 10, 14, 3), ("sentence", 15, 23, 4)]
        for a, e in zip(c.execute("SELECT token, start, end, position "
                                  "FROM tok1 WHERE input='This is a test sentence.'"), expect):
            assert e == a

        s = 'これ は テスト の 文 です'
        expect = [(None, 0, 0, 0)]
        for i, txt in enumerate(s.split()):
            expect.append((txt, expect[-1][2], expect[-1][2] + len(txt.encode('utf-8')), i))
        expect = expect[1:]
        for a, e in zip(c.execute("SELECT token, start, end, position "
                                  "FROM tok1 WHERE input=?", [s.replace(' ', '')]), expect):
            assert e == a
コード例 #23
0
def test_match(name, t):
    c = sqlite3.connect(":memory:")
    c.row_factory = sqlite3.Row
    contents = [("これは日本語で書かれています",), (" これは 日本語の文章を 全文検索するテストです",)]
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.executemany("INSERT INTO fts VALUES(?)", contents)
    assert r.rowcount == 2
    r = c.execute("SELECT * FROM fts").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH '日本語'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'ます'").fetchall()
    assert len(r) == 1 and r[0][str("content")] == contents[0][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'テスト'").fetchall()
    assert len(r) == 1 and r[0][str("content")] == contents[1][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'コレは'").fetchall()
    assert len(r) == 0
    c.close()
コード例 #24
0
def test_match(name, t):
    c = sqlite3.connect(':memory:')
    c.row_factory = sqlite3.Row
    contents = [('これは日本語で書かれています', ), (' これは 日本語の文章を 全文検索するテストです', )]
    fts.register_tokenizer(c, name, fts.make_tokenizer_module(t))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name))
    r = c.executemany('INSERT INTO fts VALUES(?)', contents)
    assert r.rowcount == 2
    r = c.execute("SELECT * FROM fts").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH '日本語'").fetchall()
    assert len(r) == 2
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'ます'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[0][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'テスト'").fetchall()
    assert len(r) == 1 and r[0][str('content')] == contents[1][0]
    r = c.execute("SELECT * FROM fts WHERE fts MATCH 'コレは'").fetchall()
    assert len(r) == 0
    c.close()
コード例 #25
0
def test_full_text_index_queries():
    name = 'oulatin'
    name1='porter'
    docs = [('README', 'huius commentarii pertinebit fortassis et ad successorem utilitas,'
             ' sed cum inter initia administrationis meae scriptus sit,'
             ' in primis ad meam institutionem regulamque proficie'),
            ("tesy",'this is a test sentence'),
            ('LICENSE', 'Cum omnis res ab imperatore delegata intentiorem exigat curam,'
            ' et me seu naturalis sollicitudo seu fides sedula non ad' 
            ' diligentiam modo verum ad amorem quoque commissae rei instigent sitque nunc'
            ' mihi ab Nerva Augusto, nescio diligentiore an amantiore rei publicae'
            ' imperatore, aquarum iniunctum officium ad usum, tum ad salubritatem atque'
            ' etiam securitatem urbis pertinens, administratum per principes semper civitatis'
            ' nostrae viros, primum ac potissimum existimo, sicut in ceteris negotiis'
            ' institueram, nosse quod suscepi.')
          ]
    with apsw.Connection(':memory:') as connection:
    #with sqlite3.connect('test.db') as c:
        #c.row_factory = apsw.Row
        c=connection.cursor()
        r=c.execute("SELECT sqlite_version()").fetchall()
        for i in r:
          print(i)
        fts.register_tokenizer(c, name, fts.make_tokenizer_module(OUWordTokenizer('latin')))
        c.execute("CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})".format(name))
        c.executemany("INSERT INTO docs(title, body) VALUES(?, ?)", docs)

        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'huius'").fetchall()
        assert len(r) == 1    
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'sed'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'sed*'").fetchall()
        assert len(r) == 2
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'comm'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'commi*'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'comm*'").fetchall()
        assert len(r) == 2
        pdb.set_trace()
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'test'").fetchall()
        assert len(r) >= 1
コード例 #26
0
def db():
    name = 'test'
    conn = sqlite3.connect(':memory:')
    conn.row_factory = sqlite3.Row

    fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer()))

    conn.execute(
        'CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})'.format(name))
    conn.execute(
        'CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})'.format(name))

    values = [
        ['Make thing I'],
        ['Some thing φχικλψ thing'],
        [
            'Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. '
            'Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra.'
        ],
        [
            'Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst.'
        ],
        [
            'Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit.'
        ],
        [
            'Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere.'
        ],
    ]
    for n in ('fts3', 'fts4'):
        result = conn.executemany('INSERT INTO {0} VALUES(?)'.format(n),
                                  values)
        assert result.rowcount == len(values)

    conn.create_function('bm25', 2, ranking.bm25)
    conn.create_function('rank', 1, ranking.simple)

    return conn
コード例 #27
0
def db():
    name = "test"
    conn = sqlite3.connect(":memory:")
    conn.row_factory = sqlite3.Row

    fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer()))

    conn.execute(
        "CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})".format(name))
    conn.execute(
        "CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})".format(name))

    values = [
        ["Make thing I"],
        ["Some thing φχικλψ thing"],
        [
            "Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. "
            "Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra."
        ],
        [
            "Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst."
        ],
        [
            "Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit."
        ],
        [
            "Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere."
        ],
    ]
    for n in ("fts3", "fts4"):
        result = conn.executemany("INSERT INTO {0} VALUES(?)".format(n),
                                  values)
        assert result.rowcount == len(values)

    conn.create_function("bm25", 2, ranking.bm25)
    conn.create_function("rank", 1, ranking.simple)

    return conn
コード例 #28
0
def test_tokenizer_output():
    name = 'simple'
    with apsw.Connection(':memory:') as c:
        fts.register_tokenizer(c, name,
                               fts.make_tokenizer_module(SimpleTokenizer()))
        c.cursor().execute(
            "CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name))
        expect = [("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2),
                  ("test", 10, 14, 3), ("sentence", 15, 23, 4)]
        for a, e in zip(c.cursor().execute(
                "SELECT token, start, end, position "
                "FROM tok1 WHERE input='This is a test sentence.'"), expect):
            assert e == a

        s = 'これ は テスト の 文 です'
        expect = [(None, 0, -1, 0)]
        for i, t in enumerate(s.split()):
            expect.append((t, expect[-1][2] + 1,
                           expect[-1][2] + 1 + len(t.encode('utf-8')), i))
        expect = expect[1:]
        for a, e in zip(c.cursor().execute(
                "SELECT token, start, end, position "
                "FROM tok1 WHERE input=?", [s]), expect):
            assert e == a
コード例 #29
0
def test_make_tokenizer():
    c = sqlite3.connect(':memory:')
    tokenizer_module = fts.make_tokenizer_module(SimpleTokenizer())
    assert fts.tokenizer.sqlite3_tokenizer_module == type(tokenizer_module)
    c.close()
コード例 #30
0
def register_tokenizer(sqlite_connection):
    tokenizer_module = fts.make_tokenizer_module(SnowballRussianTokenizer())
    fts.register_tokenizer(sqlite_connection, SnowballRussianTokenizer.name,
                           tokenizer_module)
コード例 #31
0
def tokenizer_module():
    return make_tokenizer_module(SimpleFTS5Tokenizer())
コード例 #32
0
def test_full_text_index_queries():
    name = 'simple'
    docs = [(
        'README',
        'sqlitefts-python provides binding for tokenizer of SQLite Full-Text search(FTS3/4). It allows you to write tokenizers in Python.'
    ), ('LICENSE',
        '''Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:'''),
            ('日本語', 'あいうえお かきくけこ さしすせそ たちつてと なにぬねの')]
    with apsw.Connection(':memory:') as c:
        fts.register_tokenizer(c, name,
                               fts.make_tokenizer_module(SimpleTokenizer()))
        c.cursor().execute(
            "CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})".
            format(name))
        c.cursor().executemany("INSERT INTO docs(title, body) VALUES(?, ?)",
                               docs)
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'Python'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'bind'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'to'").fetchall()
        assert len(r) == 2
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'らりるれろ'").fetchall()
        assert len(r) == 0
        assert (
            c.cursor().execute("SELECT * FROM docs WHERE docs MATCH 'binding'")
            .fetchall()[0] == c.cursor().execute(
                "SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0])
        assert (
            c.cursor().execute("SELECT * FROM docs WHERE body MATCH 'binding'")
            .fetchall()[0] == c.cursor().execute(
                "SELECT * FROM docs WHERE docs MATCH 'body:binding'")
            .fetchall()[0])
        assert (
            c.cursor().execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'")
            .fetchall()[0] == c.cursor().execute(
                "SELECT * FROM docs WHERE body MATCH 'あいうえお'").fetchall()[0])
        assert (
            c.cursor().execute("SELECT * FROM docs WHERE body MATCH 'かきくけこ'")
            .fetchall()[0] == c.cursor().execute(
                "SELECT * FROM docs WHERE docs MATCH 'body:かきくけこ'")
            .fetchall()[0])
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'title:bind'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'title:README'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'title:日本語'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE title MATCH 'bind'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE title MATCH 'README'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE title MATCH '日本語'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'to in'").fetchall()
        assert len(r) == 2
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'Py*'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'Z*'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あ*'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'ん*'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'tokenizer SQLite'").fetchall(
            )
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"tokenizer SQLite\"'"
        ).fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あいうえお たちつてと'").fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"あいうえお たちつてと\"'").fetchall(
            )
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"tok* SQL*\"'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"tok* of SQL*\"'").fetchall(
            )
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"あ* さ*\"'").fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH '\"あ* かきくけこ さ*\"'").fetchall(
            )
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'tokenizer NEAR SQLite'"
        ).fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'binding NEAR/2 SQLite'"
        ).fetchall()
        assert len(r) == 0
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'binding NEAR/3 SQLite'"
        ).fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR たちつてと'").fetchall(
            )
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/2 たちつてと'"
        ).fetchall()
        assert len(r) == 1
        r = c.cursor().execute(
            "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/3 たちつてと'"
        ).fetchall()
        assert len(r) == 1
コード例 #33
0
def tokenizer_module():
    return fts.make_tokenizer_module(SimpleTokenizer())
コード例 #34
0
def test_make_tokenizer():
    c = apsw.Connection('ouLatin.db')
    tokenizer_module = fts.make_tokenizer_module(word_tokenizer)
    assert fts.tokenizer.sqlite3_tokenizer_module == type(tokenizer_module)
    c.close()
コード例 #35
0
def create_table(c):
    fts.register_tokenizer(c, 'igo', fts.make_tokenizer_module(IgoTokenizer()))
    fts5.register_tokenizer(c, 'igo',
                            fts5.make_fts5_tokenizer(IgoTokenizer5()))
    c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize=igo)")
    c.execute("CREATE VIRTUAL TABLE fts5 USING FTS5(w, tokenize=igo)")
コード例 #36
0
def test_full_text_index_queries():
    name = 'simple'
    docs = [('README', 'sqlitefts-python provides binding for tokenizer of SQLite Full-Text search(FTS3/4). It allows you to write tokenizers in Python.'),
            ('LICENSE', '''Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:'''),
            ('日本語', 'あいうえお かきくけこ さしすせそ たちつてと なにぬねの')]
    with sqlite3.connect(':memory:') as c:
        c.row_factory = sqlite3.Row
        fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer()))
        c.execute("CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})".format(name))
        c.executemany("INSERT INTO docs(title, body) VALUES(?, ?)", docs)
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Python'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'bind'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'to'").fetchall()
        assert len(r) == 2
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'らりるれろ'").fetchall()
        assert len(r) == 0
        assert (c.execute("SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall()[0] ==
                c.execute("SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0])
        assert (c.execute("SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0] ==
                c.execute("SELECT * FROM docs WHERE docs MATCH 'body:binding'").fetchall()[0])
        assert (c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall()[0] ==
                c.execute("SELECT * FROM docs WHERE body MATCH 'あいうえお'").fetchall()[0])
        assert (c.execute("SELECT * FROM docs WHERE body MATCH 'かきくけこ'").fetchall()[0] ==
                c.execute("SELECT * FROM docs WHERE docs MATCH 'body:かきくけこ'").fetchall()[0])
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:bind'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:README'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:日本語'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE title MATCH 'bind'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE title MATCH 'README'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE title MATCH '日本語'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'to in'").fetchall()
        assert len(r) == 2
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Py*'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Z*'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あ*'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'ん*'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'tokenizer SQLite'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tokenizer SQLite\"'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお たちつてと'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あいうえお たちつてと\"'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tok* SQL*\"'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tok* of SQL*\"'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あ* さ*\"'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あ* かきくけこ さ*\"'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'tokenizer NEAR SQLite'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding NEAR/2 SQLite'").fetchall()
        assert len(r) == 0
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding NEAR/3 SQLite'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR たちつてと'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/2 たちつてと'").fetchall()
        assert len(r) == 1
        r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/3 たちつてと'").fetchall()
        assert len(r) == 1
コード例 #37
0
def tokenizer_module():
    return fts.make_tokenizer_module(SimpleTokenizer())