def test_process_error_causes_coverter_to_raise_conversion_error(self): # Patch run_process to just raise a ProcessError def raise_process_error(cmd): raise ProcessError mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process') self.expect(mock_run_proc(ARGS)).call(raise_process_error) self.replay() with tempfile.NamedTemporaryFile() as tmp_file: tika_converter = TikaConverter(path=tmp_file.name) with self.assertRaises(TikaConversionError): tika_converter.convert('')
def convert(self, orig, data, filename='', mimetype=None, **kwargs): converter = TikaConverter() try: plain_text = converter.convert(orig, filename=filename) except (ConflictError, KeyboardInterrupt): raise except TikaConversionError, exc: self._log_conversion_error(exc, mimetype=mimetype) plain_text = ''
def test_converter_accepts_file_like_stream_object(self): sample_text = 'TEXT' # Patch run_process to just return sample output mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process') self.expect(mock_run_proc(ARGS)).result((sample_text, 'stderr')) self.replay() with tempfile.NamedTemporaryFile() as tmp_file: tika_converter = TikaConverter(path=tmp_file.name) plain_text = tika_converter.convert(StringIO(sample_text)) self.assertEquals(plain_text, sample_text)
def test_converter_builds_correct_command_line(self): # Patch run_process to just return stderr and the command line given def return_cmd_line(cmd): return (cmd, '') mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process') self.expect(mock_run_proc(ARGS)).call(return_cmd_line) self.replay() jar_path = '/bin/ls' tika_converter = TikaConverter(path=jar_path) cmd = tika_converter.convert('') cmd_without_doc_filename = cmd.split()[:-1] self.assertEquals(cmd_without_doc_filename, ['java', '-jar', jar_path, '-t'])
def convert(self, orig, data, filename='', mimetype=None, **kwargs): converter = TikaConverter() try: plain_text = converter.convert(orig, filename=filename) except (ConflictError, KeyboardInterrupt): raise except TikaConversionError as exc: self._log_conversion_error(exc, mimetype=mimetype) plain_text = '' except Exception as exc: logger.warn(exc) plain_text = '' data.setData(plain_text) return data
def test_invalid_jar_path_causes_converter_to_raise(self): tika_converter = TikaConverter(path="/nonexistent") with self.assertRaises(TikaJarNotFound): tika_converter.convert('')