Python Tokenizer Examples

Programming Language: Python

Namespace/Package Name: BM25F.ja

Class/Type: Tokenizer

Examples at hotexamples.com: 6

Python Tokenizer - 6 examples found. These are the top rated real world Python examples of BM25F.ja.Tokenizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tokenizer(6)

tokenize_smartly(3)

Frequently Used Methods

Tokenizer (6)

tokenize_smartly (3)

Example #1

Show file

 def setUpClass(cls):
     tokenizer = Tokenizer()
     cls.bj = bag_jag()
     cls.bd0 = bag_dict().read(tokenizer, {
         '_id': '0',
         'title': 'テストデータ',
         'body': 'テスト',
         'anchor': 'モニタ',
     })
     cls.bj.append(cls.bd0)
     cls.bd1 = bag_dict().read(tokenizer, {
         '_id': '1',
         'title': 'テストデータ',
         'body': 'テスト',
     })
     cls.bj.append(cls.bd1)
     cls.bd2 = bag_dict().read(tokenizer, {
         '_id': '2',
         'body': 'テスト',
     })
     cls.bj.append(cls.bd2)
     cls.bd3 = bag_dict().read(tokenizer, {
         '_id': '3',
     })
     cls.bj.append(cls.bd3)
     cls.query = bag_of_words()
     cls.query['テスト'] = 1
     cls.query['モニタ'] = 1

Example #2

Show file

 def test_weight_continuous(self):
     tokenizer = Tokenizer()
     bj = bag_jag()
     bd0 = bag_dict().read(tokenizer, {'~pv': 1})
     bj.append(bd0)
     bd1 = bag_dict().read(tokenizer, {'~pv': 10})
     bj.append(bd1)
     bd2 = bag_dict().read(tokenizer, {'~pv': 100})
     bj.append(bd2)
     self.assertEqual((1 * 1.0), weight('ダミー', bd0, bj))
     self.assertEqual((10 * 1.0), weight('ダミー', bd1, bj))
     self.assertEqual((100 * 1.0), weight('ダミー', bd2, bj))

Example #3

Show file

 def setUpClass(cls):
     cls.tokenizer = Tokenizer()
     cls.bj = bag_jag()
     bd0 = bag_dict().read(cls.tokenizer, {
         'title': 'テストデータ',
         'body': 'テスト',
         'anchor': 'モニタ',
     })
     bd1 = bag_dict().read(cls.tokenizer, {
         'title': 'テストデータ',
         'body': 'テスト',
     })
     bd2 = bag_dict().read(cls.tokenizer, {
         'body': 'テスト',
     })
     bd3 = bag_dict().read(cls.tokenizer, {})
     cls.bj.append(bd0).append(bd1).append(bd2).append(bd3)

Example #4

Show file

File: test_ja.py Project: ftartarus/BM25F-master

 def test_tokenizer_with_pos_filter(self):
     m = Tokenizer(pos_filter=self.pos_filter)
     self.assertEqual([
         ('テスト', '名詞-サ変接続'),
         ('データ', '名詞-一般'),
     ], m.tokenize_smartly('テストのデータ'))

Example #5

Show file

File: test_ja.py Project: ftartarus/BM25F-master

 def test_tokenizer_with_stem_filter(self):
     m = Tokenizer(stem_filter=self.stem_filter)
     self.assertEqual([
         ('テスト', '名詞-サ変接続'),
         ('データ', '名詞-一般'),
     ], m.tokenize_smartly('その他テストデータ'))

Example #6

Show file

File: test_ja.py Project: ftartarus/BM25F-master

 def test_tokenizer(self):
     m = Tokenizer()
     self.assertEqual([
         ('テスト', '名詞-サ変接続'),
         ('データ', '名詞-一般'),
     ], m.tokenize_smartly('テストデータ'))