def _get_sentence_tokenizer(self, language): if language in self.SPECIAL_SENTENCE_TOKENIZERS: return self.SPECIAL_SENTENCE_TOKENIZERS[language] try: path = to_string("tokenizers/punkt/%s.pickle") % to_string( language) return nltk.data.load(path) except (LookupError, zipfile.BadZipfile): raise LookupError( "NLTK tokenizers are missing. Download them by following command: " '''python -c "import nltk; nltk.download('punkt')"''')
def expand_resource_path(path): return join(abspath(dirname(__file__)), to_string("data"), to_string(path))
def test_to_string(): returned = compat.to_string(O()) _assert_strings_equal(NATIVE_STRING, returned)
# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import pytest from sumy import _compat as compat BYTES_STRING = "ľščťžáýíééäúňô €đ€Ł¤".encode("utf-8") UNICODE_STRING = "ľščťžáýíééäúňô €đ€Ł¤" NATIVE_STRING = compat.to_string(UNICODE_STRING) @compat.unicode_compatible class O(object): def __unicode__(self): return UNICODE_STRING def _assert_strings_equal(str1, str2): assert type(str1) is type(str2) assert str1 == str2 @pytest.mark.skipif(not compat.PY3, reason="Python 2 doesn't support method `__bytes__`") def test_native_bytes(): returned = bytes(O()) _assert_strings_equal(BYTES_STRING, returned)
def test_to_string(self): returned = compat.to_string(self.o) self.assertStringsEqual(NATIVE_STRING, returned)
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division, print_function, unicode_literals import unittest import pytest from sumy import _compat as compat BYTES_STRING = "ľščťžáýíééäúňô €đ€Ł¤".encode("utf-8") UNICODE_STRING = "ľščťžáýíééäúňô €đ€Ł¤" NATIVE_STRING = compat.to_string(UNICODE_STRING) @compat.unicode_compatible class O(object): def __unicode__(self): return UNICODE_STRING class TestObject(unittest.TestCase): def setUp(self): self.o = O() def assertStringsEqual(self, str1, str2, *args): self.assertEqual(type(str1), type(str2), *args) self.assertEqual(str1, str2, *args) def test_native_bytes(self):