def test_match(): c = apsw.Connection(":memory:") name = "simple" contents = [("abc def",), ("abc xyz",), ("あいうえお かきくけこ",), ("あいうえお らりるれろ",)] fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.cursor().executemany("INSERT INTO fts VALUES(?)", contents) r = c.cursor().execute("SELECT * FROM fts").fetchall() assert len(r) == 4 r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall() assert len(r) == 2 r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'def'").fetchall() assert len(r) == 1 and r[0][0] == contents[0][0] r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'xyz'").fetchall() assert len(r) == 1 and r[0][0] == contents[1][0] r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall() assert len(r) == 0 r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall() assert len(r) == 2 r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'かきくけこ'").fetchall() assert len(r) == 1 and r[0][0] == contents[2][0] r = c.cursor().execute("SELECT content FROM fts WHERE fts MATCH 'らりるれろ'").fetchall() assert len(r) == 1 and r[0][0] == contents[3][0] r = c.cursor().execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall() assert len(r) == 0 c.close()
def test_match(): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row name = 'simple' contents = [('abc def', ), ('abc xyz', ), ('あいうえお かきくけこ', ), ('あいうえお らりるれろ', )] fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.executemany('INSERT INTO fts VALUES(?)', contents) assert r.rowcount == 4 r = c.execute("SELECT * FROM fts").fetchall() assert len(r) == 4 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'def'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[0][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'xyz'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[1][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'かきくけこ'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[2][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'らりるれろ'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[3][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall() assert len(r) == 0 c.close()
def test_reginster_tokenizer(name, t): c = sqlite3.connect(':memory:') tokenizer_module = fts.make_tokenizer_module(t) fts.register_tokenizer(c, name, tokenizer_module) v = c.execute("SELECT FTS3_TOKENIZER(?)", (name,)).fetchone()[0] assert ctypes.addressof(tokenizer_module) == struct.unpack("P", v)[0] c.close()
def test_match(): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row name = 'simple' contents = [('abc def',), ('abc xyz',), ('あいうえお かきくけこ',), ('あいうえお らりるれろ',)] fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.executemany('INSERT INTO fts VALUES(?)', contents) assert r.rowcount == 4 r = c.execute("SELECT * FROM fts").fetchall() assert len(r) == 4 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'abc'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'def'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[0][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'xyz'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[1][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'zzz'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'あいうえお'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'かきくけこ'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[2][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'らりるれろ'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[3][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'まみむめも'").fetchall() assert len(r) == 0 c.close()
def test_make_tokenizer(c): tm = fts.make_tokenizer_module(SimpleTokenizer()) assert all( getattr(tm, x) is not None for x in ('iVersion', 'xClose', 'xCreate', 'xDestroy', 'xLanguageid', 'xNext', 'xOpen')) c.close()
def test_tokenizer_output(name, t): with sqlite3.connect(":memory:") as c: fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name)) expect = [ ("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2), ("test", 10, 14, 3), ("sentence", 15, 23, 4), ] for a, e in zip( c.execute( "SELECT token, start, end, position " "FROM tok1 WHERE input='This is a test sentence.'" ), expect, ): assert e == a s = "これ は テスト の 文 です" expect = [(None, 0, 0, 0)] for i, txt in enumerate(s.split()): expect.append( (txt, expect[-1][2], expect[-1][2] + len(txt.encode("utf-8")), i) ) expect = expect[1:] for a, e in zip( c.execute( "SELECT token, start, end, position " "FROM tok1 WHERE input=?", [s.replace(" ", "")], ), expect, ): assert e == a
def test_tokenizer_output(): name = 'simple' with sqlite3.connect(':memory:') as c: fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format( name)) expect = [("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2), ("test", 10, 14, 3), ("sentence", 15, 23, 4)] for a, e in zip( c.execute("SELECT token, start, end, position " "FROM tok1 WHERE input='This is a test sentence.'"), expect): assert e == a s = 'これ は テスト の 文 です' expect = [(None, 0, -1, 0)] for i, t in enumerate(s.split()): expect.append((t, expect[-1][2] + 1, expect[-1][2] + 1 + len(t.encode('utf-8')), i)) expect = expect[1:] for a, e in zip( c.execute("SELECT token, start, end, position " "FROM tok1 WHERE input=?", [s]), expect): assert e == a
def setUp(self): name = 'test' conn = sqlite3.connect(':memory:') conn.row_factory = sqlite3.Row fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer())) conn.execute('CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})'.format(name)) conn.execute('CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})'.format(name)) values = [ ('Make thing I',), ('Some thing φχικλψ thing',), ('Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. ' 'Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra.',), ('Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst.',), ('Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit.',), ('Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere.',), ] for n in ('fts3', 'fts4'): result = conn.executemany('INSERT INTO {0} VALUES(?)'.format(n), values) assert result.rowcount == len(values) conn.create_function('bm25', 2, ranking.bm25) conn.create_function('rank', 1, ranking.simple) self.testee = conn
def setUp(self): name = "test" conn = sqlite3.connect(":memory:") fts.register_tokenizer(conn, name, fts.make_tokenizer_module(DebugTokenizer())) conn.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) self.testee = conn
def test_register_tokenizer(): name = 'simpe' c = sqlite3.connect(':memory:') tokenizer_module = fts.make_tokenizer_module(SimpleTokenizer()) fts.register_tokenizer(c, name, tokenizer_module) v = c.execute("SELECT FTS3_TOKENIZER(?)", (name, )).fetchone()[0] assert int(ffi.cast('intptr_t', tokenizer_module)) == \ struct.unpack("P", v)[0] c.close()
def db(): name = 'test' conn = sqlite3.connect(':memory:') fts.register_tokenizer(conn, name, fts.make_tokenizer_module(DebugTokenizer())) conn.execute('CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})'.format( name)) return conn
def db(): name = 'test' conn = sqlite3.connect(':memory:') fts.register_tokenizer(conn, name, fts.make_tokenizer_module(DebugTokenizer())) conn.execute( 'CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})'.format(name)) return conn
def test_insert(): c = apsw.Connection(":memory:") name = "simple" content = "これは日本語で書かれています" fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.cursor().execute("INSERT INTO fts VALUES(?)", (content,)) assert c.changes() == 1 r = c.cursor().execute("SELECT content FROM fts").fetchone() assert r[0] == content c.close()
def test_createtable(name, t): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name) fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute(sql) r = c.execute("SELECT * FROM sqlite_master WHERE type='table' AND name='fts'").fetchone() assert r assert r[str('type')] == 'table' and r[str('name')] == 'fts' and r[str('tbl_name')] == 'fts' assert r[str('sql')].upper() == sql.upper() c.close()
def test_insert(name, t): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row content = 'これは日本語で書かれています' fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.execute('INSERT INTO fts VALUES(?)', (content, )) assert r.rowcount == 1 r = c.execute("SELECT * FROM fts").fetchone() assert r assert r[str('content')] == content c.close()
def test_insert(name, t): c = sqlite3.connect(":memory:") c.row_factory = sqlite3.Row content = "これは日本語で書かれています" fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.execute("INSERT INTO fts VALUES(?)", (content,)) assert r.rowcount == 1 r = c.execute("SELECT * FROM fts").fetchone() assert r assert r[str("content")] == content c.close()
def test_make_tokenizer(c): tm = fts.make_tokenizer_module(SimpleTokenizer()) assert all( getattr(tm, x) is not None for x in ( "iVersion", "xClose", "xCreate", "xDestroy", "xLanguageid", "xNext", "xOpen", )) c.close()
def test_insert(): c = apsw.Connection(':memory:') name = 'simple' content = 'これは日本語で書かれています' fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute( "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.cursor().execute('INSERT INTO fts VALUES(?)', (content, )) assert c.changes() == 1 r = c.cursor().execute("SELECT content FROM fts").fetchone() assert r[0] == content c.close()
def test_createtable(): c = apsw.Connection(':memory:') name = 'simple' sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name) fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute(sql) r = c.cursor().execute( "SELECT type, name, tbl_name, sql FROM sqlite_master WHERE type='table' AND name='fts'" ).fetchone() assert r == ('table', 'fts', 'fts', sql) c.close()
def test_insert(): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row name = 'simple' content = 'これは日本語で書かれています' fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.execute('INSERT INTO fts VALUES(?)', (content, )) assert r.rowcount == 1 r = c.execute("SELECT * FROM fts").fetchone() assert r assert r[str('content')] == content c.close()
def test_createtable(name, t): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row sql = "CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name) fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute(sql) r = c.execute( "SELECT * FROM sqlite_master WHERE type='table' AND name='fts'" ).fetchone() assert r assert r[str('type')] == 'table' and r[str('name')] == 'fts' and r[str( 'tbl_name')] == 'fts' assert r[str('sql')].upper() == sql.upper() c.close()
def test_tokenizer_output(name, t): with sqlite3.connect(':memory:') as c: fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name)) expect = [("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2), ("test", 10, 14, 3), ("sentence", 15, 23, 4)] for a, e in zip(c.execute("SELECT token, start, end, position " "FROM tok1 WHERE input='This is a test sentence.'"), expect): assert e == a s = 'これ は テスト の 文 です' expect = [(None, 0, 0, 0)] for i, txt in enumerate(s.split()): expect.append((txt, expect[-1][2], expect[-1][2] + len(txt.encode('utf-8')), i)) expect = expect[1:] for a, e in zip(c.execute("SELECT token, start, end, position " "FROM tok1 WHERE input=?", [s.replace(' ', '')]), expect): assert e == a
def test_match(name, t): c = sqlite3.connect(":memory:") c.row_factory = sqlite3.Row contents = [("これは日本語で書かれています",), (" これは 日本語の文章を 全文検索するテストです",)] fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.executemany("INSERT INTO fts VALUES(?)", contents) assert r.rowcount == 2 r = c.execute("SELECT * FROM fts").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH '日本語'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'ます'").fetchall() assert len(r) == 1 and r[0][str("content")] == contents[0][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'テスト'").fetchall() assert len(r) == 1 and r[0][str("content")] == contents[1][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'コレは'").fetchall() assert len(r) == 0 c.close()
def test_match(name, t): c = sqlite3.connect(':memory:') c.row_factory = sqlite3.Row contents = [('これは日本語で書かれています', ), (' これは 日本語の文章を 全文検索するテストです', )] fts.register_tokenizer(c, name, fts.make_tokenizer_module(t)) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize={})".format(name)) r = c.executemany('INSERT INTO fts VALUES(?)', contents) assert r.rowcount == 2 r = c.execute("SELECT * FROM fts").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH '日本語'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM fts WHERE fts MATCH 'ます'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[0][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'テスト'").fetchall() assert len(r) == 1 and r[0][str('content')] == contents[1][0] r = c.execute("SELECT * FROM fts WHERE fts MATCH 'コレは'").fetchall() assert len(r) == 0 c.close()
def test_full_text_index_queries(): name = 'oulatin' name1='porter' docs = [('README', 'huius commentarii pertinebit fortassis et ad successorem utilitas,' ' sed cum inter initia administrationis meae scriptus sit,' ' in primis ad meam institutionem regulamque proficie'), ("tesy",'this is a test sentence'), ('LICENSE', 'Cum omnis res ab imperatore delegata intentiorem exigat curam,' ' et me seu naturalis sollicitudo seu fides sedula non ad' ' diligentiam modo verum ad amorem quoque commissae rei instigent sitque nunc' ' mihi ab Nerva Augusto, nescio diligentiore an amantiore rei publicae' ' imperatore, aquarum iniunctum officium ad usum, tum ad salubritatem atque' ' etiam securitatem urbis pertinens, administratum per principes semper civitatis' ' nostrae viros, primum ac potissimum existimo, sicut in ceteris negotiis' ' institueram, nosse quod suscepi.') ] with apsw.Connection(':memory:') as connection: #with sqlite3.connect('test.db') as c: #c.row_factory = apsw.Row c=connection.cursor() r=c.execute("SELECT sqlite_version()").fetchall() for i in r: print(i) fts.register_tokenizer(c, name, fts.make_tokenizer_module(OUWordTokenizer('latin'))) c.execute("CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})".format(name)) c.executemany("INSERT INTO docs(title, body) VALUES(?, ?)", docs) r = c.execute("SELECT * FROM docs WHERE docs MATCH 'huius'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'sed'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'sed*'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'comm'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'commi*'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'comm*'").fetchall() assert len(r) == 2 pdb.set_trace() r = c.execute("SELECT * FROM docs WHERE docs MATCH 'test'").fetchall() assert len(r) >= 1
def db(): name = 'test' conn = sqlite3.connect(':memory:') conn.row_factory = sqlite3.Row fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer())) conn.execute( 'CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})'.format(name)) conn.execute( 'CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})'.format(name)) values = [ ['Make thing I'], ['Some thing φχικλψ thing'], [ 'Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. ' 'Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra.' ], [ 'Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst.' ], [ 'Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit.' ], [ 'Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere.' ], ] for n in ('fts3', 'fts4'): result = conn.executemany('INSERT INTO {0} VALUES(?)'.format(n), values) assert result.rowcount == len(values) conn.create_function('bm25', 2, ranking.bm25) conn.create_function('rank', 1, ranking.simple) return conn
def db(): name = "test" conn = sqlite3.connect(":memory:") conn.row_factory = sqlite3.Row fts.register_tokenizer(conn, name, fts.make_tokenizer_module(Tokenizer())) conn.execute( "CREATE VIRTUAL TABLE fts3 USING FTS3(tokenize={})".format(name)) conn.execute( "CREATE VIRTUAL TABLE fts4 USING FTS4(tokenize={})".format(name)) values = [ ["Make thing I"], ["Some thing φχικλψ thing"], [ "Fusce volutpat hendrerit sem. Fusce sit amet vulputate dui. " "Sed posuere mi a nisl aliquet tempor. Praesent tincidunt vel nunc ac pharetra." ], [ "Nam molestie euismod leo id aliquam. In hac habitasse platea dictumst." ], [ "Vivamus tincidunt feugiat tellus ac bibendum. In rhoncus dignissim suscipit." ], [ "Pellentesque hendrerit nulla rutrum luctus rutrum. Fusce hendrerit fermentum nunc at posuere." ], ] for n in ("fts3", "fts4"): result = conn.executemany("INSERT INTO {0} VALUES(?)".format(n), values) assert result.rowcount == len(values) conn.create_function("bm25", 2, ranking.bm25) conn.create_function("rank", 1, ranking.simple) return conn
def test_tokenizer_output(): name = 'simple' with apsw.Connection(':memory:') as c: fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute( "CREATE VIRTUAL TABLE tok1 USING fts3tokenize({})".format(name)) expect = [("This", 0, 4, 0), ("is", 5, 7, 1), ("a", 8, 9, 2), ("test", 10, 14, 3), ("sentence", 15, 23, 4)] for a, e in zip(c.cursor().execute( "SELECT token, start, end, position " "FROM tok1 WHERE input='This is a test sentence.'"), expect): assert e == a s = 'これ は テスト の 文 です' expect = [(None, 0, -1, 0)] for i, t in enumerate(s.split()): expect.append((t, expect[-1][2] + 1, expect[-1][2] + 1 + len(t.encode('utf-8')), i)) expect = expect[1:] for a, e in zip(c.cursor().execute( "SELECT token, start, end, position " "FROM tok1 WHERE input=?", [s]), expect): assert e == a
def test_make_tokenizer(): c = sqlite3.connect(':memory:') tokenizer_module = fts.make_tokenizer_module(SimpleTokenizer()) assert fts.tokenizer.sqlite3_tokenizer_module == type(tokenizer_module) c.close()
def register_tokenizer(sqlite_connection): tokenizer_module = fts.make_tokenizer_module(SnowballRussianTokenizer()) fts.register_tokenizer(sqlite_connection, SnowballRussianTokenizer.name, tokenizer_module)
def tokenizer_module(): return make_tokenizer_module(SimpleFTS5Tokenizer())
def test_full_text_index_queries(): name = 'simple' docs = [( 'README', 'sqlitefts-python provides binding for tokenizer of SQLite Full-Text search(FTS3/4). It allows you to write tokenizers in Python.' ), ('LICENSE', '''Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:'''), ('日本語', 'あいうえお かきくけこ さしすせそ たちつてと なにぬねの')] with apsw.Connection(':memory:') as c: fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.cursor().execute( "CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})". format(name)) c.cursor().executemany("INSERT INTO docs(title, body) VALUES(?, ?)", docs) r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'Python'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'bind'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'to'").fetchall() assert len(r) == 2 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'らりるれろ'").fetchall() assert len(r) == 0 assert ( c.cursor().execute("SELECT * FROM docs WHERE docs MATCH 'binding'") .fetchall()[0] == c.cursor().execute( "SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0]) assert ( c.cursor().execute("SELECT * FROM docs WHERE body MATCH 'binding'") .fetchall()[0] == c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'body:binding'") .fetchall()[0]) assert ( c.cursor().execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'") .fetchall()[0] == c.cursor().execute( "SELECT * FROM docs WHERE body MATCH 'あいうえお'").fetchall()[0]) assert ( c.cursor().execute("SELECT * FROM docs WHERE body MATCH 'かきくけこ'") .fetchall()[0] == c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'body:かきくけこ'") .fetchall()[0]) r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'title:bind'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'title:README'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'title:日本語'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE title MATCH 'bind'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE title MATCH 'README'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE title MATCH '日本語'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'to in'").fetchall() assert len(r) == 2 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'Py*'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'Z*'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あ*'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'ん*'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'tokenizer SQLite'").fetchall( ) assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"tokenizer SQLite\"'" ).fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あいうえお たちつてと'").fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"あいうえお たちつてと\"'").fetchall( ) assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"tok* SQL*\"'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"tok* of SQL*\"'").fetchall( ) assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"あ* さ*\"'").fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH '\"あ* かきくけこ さ*\"'").fetchall( ) assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'tokenizer NEAR SQLite'" ).fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'binding NEAR/2 SQLite'" ).fetchall() assert len(r) == 0 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'binding NEAR/3 SQLite'" ).fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR たちつてと'").fetchall( ) assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/2 たちつてと'" ).fetchall() assert len(r) == 1 r = c.cursor().execute( "SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/3 たちつてと'" ).fetchall() assert len(r) == 1
def tokenizer_module(): return fts.make_tokenizer_module(SimpleTokenizer())
def test_make_tokenizer(): c = apsw.Connection('ouLatin.db') tokenizer_module = fts.make_tokenizer_module(word_tokenizer) assert fts.tokenizer.sqlite3_tokenizer_module == type(tokenizer_module) c.close()
def create_table(c): fts.register_tokenizer(c, 'igo', fts.make_tokenizer_module(IgoTokenizer())) fts5.register_tokenizer(c, 'igo', fts5.make_fts5_tokenizer(IgoTokenizer5())) c.execute("CREATE VIRTUAL TABLE fts USING FTS4(tokenize=igo)") c.execute("CREATE VIRTUAL TABLE fts5 USING FTS5(w, tokenize=igo)")
def test_full_text_index_queries(): name = 'simple' docs = [('README', 'sqlitefts-python provides binding for tokenizer of SQLite Full-Text search(FTS3/4). It allows you to write tokenizers in Python.'), ('LICENSE', '''Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:'''), ('日本語', 'あいうえお かきくけこ さしすせそ たちつてと なにぬねの')] with sqlite3.connect(':memory:') as c: c.row_factory = sqlite3.Row fts.register_tokenizer(c, name, fts.make_tokenizer_module(SimpleTokenizer())) c.execute("CREATE VIRTUAL TABLE docs USING FTS4(title, body, tokenize={})".format(name)) c.executemany("INSERT INTO docs(title, body) VALUES(?, ?)", docs) r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Python'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'bind'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'to'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'らりるれろ'").fetchall() assert len(r) == 0 assert (c.execute("SELECT * FROM docs WHERE docs MATCH 'binding'").fetchall()[0] == c.execute("SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0]) assert (c.execute("SELECT * FROM docs WHERE body MATCH 'binding'").fetchall()[0] == c.execute("SELECT * FROM docs WHERE docs MATCH 'body:binding'").fetchall()[0]) assert (c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお'").fetchall()[0] == c.execute("SELECT * FROM docs WHERE body MATCH 'あいうえお'").fetchall()[0]) assert (c.execute("SELECT * FROM docs WHERE body MATCH 'かきくけこ'").fetchall()[0] == c.execute("SELECT * FROM docs WHERE docs MATCH 'body:かきくけこ'").fetchall()[0]) r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:bind'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:README'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'title:日本語'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE title MATCH 'bind'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE title MATCH 'README'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE title MATCH '日本語'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'to in'").fetchall() assert len(r) == 2 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Py*'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'Z*'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あ*'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'ん*'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'tokenizer SQLite'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tokenizer SQLite\"'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお たちつてと'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あいうえお たちつてと\"'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tok* SQL*\"'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"tok* of SQL*\"'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あ* さ*\"'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH '\"あ* かきくけこ さ*\"'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'tokenizer NEAR SQLite'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding NEAR/2 SQLite'").fetchall() assert len(r) == 0 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'binding NEAR/3 SQLite'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR たちつてと'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/2 たちつてと'").fetchall() assert len(r) == 1 r = c.execute("SELECT * FROM docs WHERE docs MATCH 'あいうえお NEAR/3 たちつてと'").fetchall() assert len(r) == 1