def test_en_simple(self): # Trigger reference-counting bug that was fixed in PyICU 1.0.1: # https://github.com/ovalhub/pyicu/commit/515e076682e29d806aeb5f6b1016b799d03d92a9 icu = get_icu() assert_not_equal(icu, None) t = list(word_break_iterator('eggs', icu.Locale('en'))) assert_equal(t, [4])
def test_invalid_signo(self): # signal.NSIG is guaranteed not be a correct signal number ex = ipc.CalledProcessInterrupted(signal.NSIG, 'eggs') assert_equal( str(ex), "Command 'eggs' was interrupted by signal {0}".format(signal.NSIG)) assert_false(ex.by_user)
def test_en(self): icu = get_icu() assert_not_equal(icu, None) t = list(word_break_iterator(text, icu.Locale('en'))) s = [1, 7, 8, 9, 10, 15, 16, 24, 25, 26, 30, 31, 32, 33, 37, 38, 43, 44, 45, 46, 47, 48, 54, 55, 57, 58, 61, 62, 65, 66, 67] assert_equal(t, s) assert_equal(s[-1], len(text))
def test_nolocale(self): t = list(word_break_iterator(text)) s = [ 9, 10, 15, 16, 25, 26, 30, 31, 32, 33, 37, 38, 44, 45, 47, 48, 54, 55, 57, 58, 61, 62, 67 ] assert_equal(t, s) assert_equal(s[-1], len(text))
def _test_signal(self, name): child = ipc.Subprocess( ['cat'], stdin=ipc.PIPE) # Any long-standing process would do. os.kill(child.pid, getattr(signal, name)) with assert_raises(ipc.CalledProcessInterrupted) as ecm: child.wait() assert_equal(str(ecm.exception), "Command 'cat' was interrupted by signal " + name)
def test1(self): child = ipc.Subprocess(['false']) with assert_raises(ipc.CalledProcessError) as ecm: child.wait() message = str(ecm.exception) if message[-1] == '.': # subprocess32 >= 3.5 message = message[:-1] assert_equal(message, "Command 'false' returned non-zero exit status 1")
def test_help(): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(hocr2djvused.main, ['', '--help']) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_not_equal(stdout.getvalue(), '')
def test_bad_options(): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(hocr2djvused.main, ['', '--bad-option']) assert_equal(rc, errors.EXIT_FATAL) assert_not_equal(stderr.getvalue(), '') assert_equal(stdout.getvalue(), '')
def test_help(): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(djvu2hocr.main, ['', '--help']) assert_equal(rc, 0) assert_equal(stderr.getvalue(), '') assert_not_equal(stdout.getvalue(), '')
def test_bad_options(): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(djvu2hocr.main, ['']) assert_equal(rc, errors.EXIT_FATAL) assert_not_equal(stderr.getvalue(), '') assert_equal(stdout.getvalue(), '')
def _test_list_languages(engine): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, ['', '--engine', engine, '--list-languages']) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_not_equal(stdout.getvalue(), '')
def test_list_engines(): global engines stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, ['', '--list-engines']) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) engines = stdout.getvalue().splitlines()
def test_fail(self): prog = 'ocrodjvu-nonexistent' with assert_raises(OSError) as ecm: ipc.require(prog) exc_message = "[Errno {errno.ENOENT}] command not found: {cmd!r}".format( errno=errno, cmd=prog, ) assert_equal(str(ecm.exception), exc_message)
def test_version(): # https://bugs.debian.org/573496 stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(hocr2djvused.main, ['', '--version']) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_not_equal(stdout.getvalue(), '')
def test1(self): with interim_environ(ocrodjvu='42'): child = ipc.Subprocess( ['sh', '-c', 'printf $ocrodjvu'], stdout=ipc.PIPE, stderr=ipc.PIPE, ) stdout, stderr = child.communicate() assert_equal(stdout, '42') assert_equal(stderr, '')
def _test_list_languages(engine): stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, ['', '--engine', engine, '--list-languages']) assert_equal(rc, 0) assert_equal(stderr.getvalue(), '') assert_not_equal(stdout.getvalue(), '')
def test_list_engines(): global engines stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, ['', '--list-engines']) assert_equal(rc, 0) assert_equal(stderr.getvalue(), '') engines = stdout.getvalue().splitlines()
def test_version(): # https://bugs.debian.org/573496 stdout = io.BytesIO() stderr = io.BytesIO() with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(djvu2hocr.main, ['', '--version']) assert_equal(rc, 0) assert_equal(stderr.getvalue(), '') assert_not_equal(stdout.getvalue(), '')
def test_init_exc(): # https://bugs.python.org/issue32490 prog = 'ocrodjvu-nonexistent' with assert_raises(EnvironmentError) as ecm: ipc.Subprocess([prog]) msg = '[Errno {err}] {strerr}: {prog!r}'.format(err=errno.ENOENT, strerr=os.strerror( errno.ENOENT), prog=prog) assert_equal(str(ecm.exception), msg)
def test_init_exc(): # https://bugs.python.org/issue32490 prog = 'ocrodjvu-nonexistent' with assert_raises(EnvironmentError) as ecm: ipc.Subprocess([prog]) msg = '[Errno {err}] {strerr}: {prog!r}'.format( err=errno.ENOENT, strerr=os.strerror(errno.ENOENT), prog=prog ) assert_equal(str(ecm.exception), msg)
def test_print_sexpr(): inp = 'jeż' if python_djvulibre_version < V('0.4'): out = r'"je\305\274"' else: out = '"jeż"' fp = io.BytesIO() expr = text_zones.sexpr.Expression(inp) text_zones.print_sexpr(expr, fp) fp.seek(0) assert_equal(fp.getvalue(), out)
def t(): with assert_raises(ImportError) as ecm: try: import nonexistent except ImportError as ex: enhance_import_error(ex, 'PyNonexistent', None, 'http://pynonexistent.example.net/') raise nonexistent.f() # quieten pyflakes assert_equal( str(ecm.exception), 'No module named nonexistent; ' 'please install the PyNonexistent package <http://pynonexistent.example.net/>' )
def test_debian(self): with interim(lib.utils, debian=True): with assert_raises(ImportError) as ecm: try: import nonexistent except ImportError as ex: enhance_import_error(ex, 'PyNonexistent', 'python-nonexistent', 'http://pynonexistent.example.net/') raise nonexistent.f() # quieten pyflakes assert_equal( str(ecm.exception), 'No module named nonexistent; ' 'please install the python-nonexistent package')
def _rough_test_from_file(base_filename, args): args = ['#'] + shlex.split(args) if base_filename.endswith(('cuneiform0.7', 'cuneiform0.8')): # Add dummy page-size information args += ['--page-size=1000x1000'] base_filename = os.path.join(here, base_filename) html_filename = '{base}.html'.format(base=base_filename) with contextlib.closing(io.BytesIO()) as output_file: with open(html_filename, 'rb') as html_file: with interim(sys, stdin=html_file, stdout=output_file): rc = try_run(hocr2djvused.main, args) assert_equal(rc, 0) output = output_file.getvalue() assert_not_equal(output, '')
def test_control_characters(self): def show(message, category, filename, lineno, file=None, line=None): with assert_raises_regex(EncodingWarning, '.*control character.*'): raise message s = ''.join(map(chr, xrange(32))) with warnings.catch_warnings(): warnings.showwarning = show t = sanitize_utf8(s).decode('UTF-8') assert_equal( t, u'\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' u'\uFFFD\t\n\uFFFD\uFFFD\r\uFFFD\uFFFD' u'\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' u'\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD')
def test_get_thread_limit(): def t(nitems, njobs, xlim): lim = lib.utils.get_thread_limit(nitems, njobs) assert_equal(lim, xlim) for nitems in range(0, 10): for njobs in range(1, 10): lim = lib.utils.get_thread_limit(nitems, njobs) assert_is_instance(lim, int) if nitems == 0: assert_equal(lim, 1) else: npitems = min(nitems, njobs) assert_less_equal(lim * npitems, njobs) assert_greater((lim + 1) * npitems, njobs)
def test_non_utf8(self): def show(message, category, filename, lineno, file=None, line=None): with assert_raises_regex(EncodingWarning, '.* invalid continuation byte'): raise message s0 = 'Jeżu klątw, spłódź Finom część gry hańb' good = 'ó' bad = good.decode('UTF-8').encode('ISO-8859-2') s1 = s0.replace(good, bad) s2 = s0.replace(good, u'\N{REPLACEMENT CHARACTER}'.encode('UTF-8')) with warnings.catch_warnings(): warnings.showwarning = show t = sanitize_utf8(s1) assert_equal(s2, t)
def _test_ocr(engine, layers): if not distutils.spawn.find_executable(engine): raise SkipTest('{cmd} not found'.format(cmd=engine)) remove_logging_handlers('ocrodjvu.') here = os.path.dirname(__file__) here = os.path.abspath(here) path = os.path.join(here, '..', 'data', 'alice.djvu') stdout = io.BytesIO() stderr = io.BytesIO() with temporary.directory() as tmpdir: tmp_path = os.path.join(tmpdir, 'tmp.djvu') with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, ['', '--engine', engine, '--render', layers, '--save-bundled', tmp_path, path]) assert_multi_line_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_multi_line_equal(stdout.getvalue(), '')
def test_nonascii_path(): require_locale_encoding('UTF-8') # djvused breaks otherwise remove_logging_handlers('ocrodjvu.') here = os.path.dirname(__file__) here = os.path.abspath(here) path = os.path.join(here, '..', 'data', 'empty.djvu') stdout = io.BytesIO() stderr = io.BytesIO() with temporary.directory() as tmpdir: tmp_path = os.path.join(tmpdir, 'тмп.djvu') os.symlink(path, tmp_path) with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(djvu2hocr.main, ['', tmp_path]) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_not_equal(stdout.getvalue(), '')
def _test_from_file(base_filename, index, extra_args): base_filename = os.path.join(here, base_filename) test_filename = '{base}.test{i}'.format(base=base_filename, i=index) html_filename = '{base}.html'.format(base=base_filename) with open(test_filename, 'rb') as file: commandline = file.readline() expected_output = file.read() args = shlex.split(commandline) + shlex.split(extra_args) assert_equal(args[0], '#') with contextlib.closing(io.BytesIO()) as output_file: with open(html_filename, 'rb') as html_file: with interim(sys, stdin=html_file, stdout=output_file): rc = try_run(hocr2djvused.main, args) assert_equal(rc, 0) output = output_file.getvalue() assert_multi_line_equal(normalize_djvused(expected_output), normalize_djvused(output))
def test_path(self): path = os.getenv('PATH').split(':') with temporary.directory() as tmpdir: command_name = temporary.name(dir=tmpdir) command_path = os.path.join(tmpdir, command_name) with open(command_path, 'wt') as file: print('#!/bin/sh', file=file) print('printf 42', file=file) os.chmod(command_path, stat.S_IRWXU) path[:0] = [tmpdir] path = ':'.join(path) with interim_environ(PATH=path): child = ipc.Subprocess([command_name], stdout=ipc.PIPE, stderr=ipc.PIPE, ) stdout, stderr = child.communicate() assert_equal(stdout, '42') assert_equal(stderr, '')
def _test_ocr(engine, layers): if not distutils.spawn.find_executable(engine): raise SkipTest('{cmd} not found'.format(cmd=engine)) remove_logging_handlers('ocrodjvu.') here = os.path.dirname(__file__) here = os.path.abspath(here) path = os.path.join(here, '..', 'data', 'alice.djvu') stdout = io.BytesIO() stderr = io.BytesIO() with temporary.directory() as tmpdir: tmp_path = os.path.join(tmpdir, 'tmp.djvu') with interim(sys, stdout=stdout, stderr=stderr): rc = try_run(ocrodjvu.main, [ '', '--engine', engine, '--render', layers, '--save-bundled', tmp_path, path ]) assert_multi_line_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_multi_line_equal(stdout.getvalue(), '')
def _test_from_file(base_filename, index, extra_args): base_filename = os.path.join(here, base_filename) test_filename = '{base}.test{i}'.format(base=base_filename, i=index) html_filename = '{base}.html'.format(base=base_filename) with open(test_filename, 'rb') as file: commandline = file.readline() expected_output = file.read() args = shlex.split(commandline) + shlex.split(extra_args) assert_equal(args[0], '#') with contextlib.closing(io.BytesIO()) as output_file: with open(html_filename, 'rb') as html_file: with interim(sys, stdin=html_file, stdout=output_file): rc = try_run(hocr2djvused.main, args) assert_equal(rc, 0) output = output_file.getvalue() assert_multi_line_equal( normalize_djvused(expected_output), normalize_djvused(output) )
def test_path(self): path = os.getenv('PATH').split(':') with temporary.directory() as tmpdir: command_name = temporary.name(dir=tmpdir) command_path = os.path.join(tmpdir, command_name) with open(command_path, 'wt') as file: print('#!/bin/sh', file=file) print('printf 42', file=file) os.chmod(command_path, stat.S_IRWXU) path[:0] = [tmpdir] path = ':'.join(path) with interim_environ(PATH=path): child = ipc.Subprocess( [command_name], stdout=ipc.PIPE, stderr=ipc.PIPE, ) stdout, stderr = child.communicate() assert_equal(stdout, '42') assert_equal(stderr, '')
def test_encoded_string(self): for s in '', '\f', 'eggs', '''e'gg"s''': assert_equal(eval(smart_repr(s, 'ASCII')), s) assert_equal(eval(smart_repr(s, 'UTF-8')), s) for s in 'jeż', '''j'e"ż''': s_repr = smart_repr(s, 'ASCII') assert_is_instance(s_repr, str) s_repr.decode('ASCII') assert_equal(eval(s_repr), s) for s in 'jeż', '''j'e"ż''': s_repr = smart_repr(s, 'UTF-8') assert_is_instance(s_repr, str) assert_in('ż', s_repr) assert_equal(eval(s_repr), s)
def _test_from_file(base_filename, index): base_filename = os.path.join(here, base_filename) test_filename = '{base}.test{i}'.format(base=base_filename, i=index) djvused_filename = base_filename + '.djvused' with open(test_filename, 'rb') as file: commandline = file.readline() expected_output = file.read() args = shlex.split(commandline) assert_equal(args[0], '#') with temporary.directory() as tmpdir: djvu_filename = os.path.join(tmpdir, 'empty.djvu') args += [djvu_filename] shutil.copy( os.path.join(os.path.dirname(__file__), '..', 'data', 'empty.djvu'), djvu_filename) ipc.Subprocess( ['djvused', '-f', djvused_filename, '-s', djvu_filename]).wait() xml_filename = os.path.join(tmpdir, 'output.html') with open(xml_filename, 'w+b') as xml_file: xmllint = ipc.Subprocess(['xmllint', '--format', '-'], stdin=ipc.PIPE, stdout=xml_file) try: with open(os.devnull, 'w') as null: with interim(sys, stdout=xmllint.stdin, stderr=null): with interim(djvu2hocr.logger, handlers=[]): rc = try_run(djvu2hocr.main, args) finally: xmllint.stdin.close() try: xmllint.wait() except ipc.CalledProcessError: # Raising the exception here is likely to hide the real # reason of the failure. pass assert_equal(rc, 0) xml_file.seek(0) output = xml_file.read() assert_multi_line_equal(expected_output, output)
def _test_from_file(base_filename, index): base_filename = os.path.join(here, base_filename) test_filename = '{base}.test{i}'.format(base=base_filename, i=index) djvused_filename = base_filename + '.djvused' with open(test_filename, 'rb') as file: commandline = file.readline() expected_output = file.read() args = shlex.split(commandline) assert_equal(args[0], '#') with temporary.directory() as tmpdir: djvu_filename = os.path.join(tmpdir, 'empty.djvu') args += [djvu_filename] shutil.copy( os.path.join(os.path.dirname(__file__), '..', 'data', 'empty.djvu'), djvu_filename) ipc.Subprocess(['djvused', '-f', djvused_filename, '-s', djvu_filename]).wait() xml_filename = os.path.join(tmpdir, 'output.html') with open(xml_filename, 'w+b') as xml_file: xmllint = ipc.Subprocess(['xmllint', '--format', '-'], stdin=ipc.PIPE, stdout=xml_file) try: with open(os.devnull, 'w') as null: with interim(sys, stdout=xmllint.stdin, stderr=null): with interim(djvu2hocr.logger, handlers=[]): rc = try_run(djvu2hocr.main, args) finally: xmllint.stdin.close() try: xmllint.wait() except ipc.CalledProcessError: # Raising the exception here is likely to hide the real # reason of the failure. pass assert_equal(rc, 0) xml_file.seek(0) output = xml_file.read() assert_multi_line_equal(expected_output, output)
def test_bad_page_id(): remove_logging_handlers('ocrodjvu.') here = os.path.dirname(__file__) here = os.path.abspath(here) path = os.path.join(here, '..', 'data', 'bad-page-id.djvu') stdout = io.BytesIO() stderr = io.BytesIO() with temporary.directory() as tmpdir: out_path = os.path.join(tmpdir, 'tmp.djvu') with interim(sys, stdout=stdout, stderr=stderr): with interim(ocrodjvu, system_encoding='ASCII'): rc = try_run(ocrodjvu.main, [ '', '--engine', '_dummy', '--save-bundled', out_path, path ]) assert_equal(stderr.getvalue(), '') assert_equal(rc, 0) assert_equal(stdout.getvalue(), '')
def test_empty(self): t = list(simple_word_break_iterator('')) assert_equal(t, [])
def test_nolocale(self): t = list(word_break_iterator(text)) s = [9, 10, 15, 16, 25, 26, 30, 31, 32, 33, 37, 38, 44, 45, 47, 48, 54, 55, 57, 58, 61, 62, 67] assert_equal(t, s) assert_equal(s[-1], len(text))
def fake_subprocess(args, *rest, **kwrest): # Record arguments that were used and break immediately. assert_equal(args[0], self.engine.executable) assert_equal(args[1], '-l') assert_equal(args[2], lang1) raise EOFError
def _test_locale(self): child = ipc.Subprocess(['locale'], stdout=ipc.PIPE, stderr=ipc.PIPE ) stdout, stderr = child.communicate() stdout = stdout.splitlines() stderr = stderr.splitlines() assert_equal(stderr, []) data = dict(line.split('=', 1) for line in stdout) has_lc_all = has_lc_ctype = has_lang = 0 for key, value in data.iteritems(): if key == 'LC_ALL': has_lc_all = 1 assert_equal(value, '') elif key == 'LC_CTYPE': has_lc_ctype = 1 assert_equal(value, 'en_US.UTF-8') elif key == 'LANG': has_lang = 1 assert_equal(value, '') elif key == 'LANGUAGE': assert_equal(value, '') else: assert_equal(value, '"POSIX"') assert_true(has_lc_all) assert_true(has_lc_ctype) assert_true(has_lang)
def _test_from_file(base_filename, format): if format.bpp == 1: layers = djvu.decode.RENDER_MASK_ONLY else: layers = djvu.decode.RENDER_COLOR base_filename = os.path.join(here, base_filename) djvu_filename = '{base}.djvu'.format(base=base_filename) expected_filename = '{base}_{bpp}bpp.{ext}'.format(base=base_filename, bpp=format.bpp, ext=format.extension) with open(expected_filename, 'rb') as file: expected = file.read() context = djvu.decode.Context() document = context.new_document(djvu.decode.FileUri(djvu_filename)) page_job = document.pages[0].decode(wait=True) file = io.BytesIO() format.write_image(page_job, layers, file) result = file.getvalue() assert_equal(len(result), len(expected)) if result == expected: # The easy part: return else: # The result might be still correct, even if the strings are different. # Think of BMP format and its padding bytes. expected = Image.open(expected_filename) result = Image.open(io.BytesIO(result)) assert_equal(result.format, expected.format) assert_equal(result.size, expected.size) assert_equal(result.mode, expected.mode) if result.palette is None: assert_is_none(expected.palette) else: assert_equal(list(result.palette.getdata()), list(expected.palette.getdata())) assert_equal(list(result.getdata()), list(expected.getdata()))
def test_en_empty(self): icu = get_icu() assert_not_equal(icu, None) t = list(word_break_iterator('', icu.Locale('en'))) assert_equal(t, [])
def _test_signal(self, name): child = ipc.Subprocess(['cat'], stdin=ipc.PIPE) # Any long-standing process would do. os.kill(child.pid, getattr(signal, name)) with assert_raises(ipc.CalledProcessInterrupted) as ecm: child.wait() assert_equal(str(ecm.exception), "Command 'cat' was interrupted by signal " + name)
def test_default_filter(self): dummy = self.Dummy() assert_equal(dummy.eggs, None) assert_equal(dummy.ham, 42) dummy.eggs = -4 dummy.ham = -2 assert_equal(dummy.eggs, -4) assert_equal(dummy.ham, -2) dummy = self.Dummy() assert_equal(dummy.eggs, None) assert_equal(dummy.ham, 42)
def test_sigint(self): ex = ipc.CalledProcessInterrupted(signal.SIGINT, 'eggs') assert_equal(str(ex), "Command 'eggs' was interrupted by signal SIGINT") assert_true(ex.by_user)
def t(nitems, njobs, xlim): lim = lib.utils.get_thread_limit(nitems, njobs) assert_equal(lim, xlim)
def test_sigsegv(self): ex = ipc.CalledProcessInterrupted(signal.SIGSEGV, 'eggs') assert_equal(str(ex), "Command 'eggs' was interrupted by signal SIGSEGV") assert_false(ex.by_user)
def test_invalid_signo(self): # signal.NSIG is guaranteed not be a correct signal number ex = ipc.CalledProcessInterrupted(signal.NSIG, 'eggs') assert_equal(str(ex), "Command 'eggs' was interrupted by signal {0}".format(signal.NSIG)) assert_false(ex.by_user)
def test_nolocale_empty(self): t = list(word_break_iterator('')) assert_equal(t, [])