Beispiel #1
0
    def test_process_error_causes_coverter_to_raise_conversion_error(self):
        # Patch run_process to just raise a ProcessError

        def raise_process_error(cmd):
            raise ProcessError

        mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process')
        self.expect(mock_run_proc(ARGS)).call(raise_process_error)
        self.replay()

        with tempfile.NamedTemporaryFile() as tmp_file:
            tika_converter = TikaConverter(path=tmp_file.name)
            with self.assertRaises(TikaConversionError):
                tika_converter.convert('')
    def convert(self, orig, data, filename='', mimetype=None, **kwargs):
        converter = TikaConverter()
        try:
            plain_text = converter.convert(orig, filename=filename)

        except (ConflictError, KeyboardInterrupt):
            raise

        except TikaConversionError, exc:
            self._log_conversion_error(exc, mimetype=mimetype)
            plain_text = ''
    def convert(self, orig, data, filename='', mimetype=None, **kwargs):
        converter = TikaConverter()
        try:
            plain_text = converter.convert(orig, filename=filename)

        except (ConflictError, KeyboardInterrupt):
            raise

        except TikaConversionError, exc:
            self._log_conversion_error(exc, mimetype=mimetype)
            plain_text = ''
Beispiel #4
0
    def test_converter_accepts_file_like_stream_object(self):
        sample_text = 'TEXT'

        # Patch run_process to just return sample output
        mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process')
        self.expect(mock_run_proc(ARGS)).result((sample_text, 'stderr'))
        self.replay()

        with tempfile.NamedTemporaryFile() as tmp_file:
            tika_converter = TikaConverter(path=tmp_file.name)
            plain_text = tika_converter.convert(StringIO(sample_text))

        self.assertEquals(plain_text, sample_text)
Beispiel #5
0
    def test_converter_builds_correct_command_line(self):
        # Patch run_process to just return stderr and the command line given

        def return_cmd_line(cmd):
            return (cmd, '')

        mock_run_proc = self.mocker.replace('ftw.tika.converter.run_process')
        self.expect(mock_run_proc(ARGS)).call(return_cmd_line)
        self.replay()

        jar_path = '/bin/ls'
        tika_converter = TikaConverter(path=jar_path)
        cmd = tika_converter.convert('')
        cmd_without_doc_filename = cmd.split()[:-1]

        self.assertEquals(cmd_without_doc_filename,
                          ['java', '-jar', jar_path, '-t'])
Beispiel #6
0
    def convert(self, orig, data, filename='', mimetype=None, **kwargs):
        converter = TikaConverter()
        try:
            plain_text = converter.convert(orig, filename=filename)

        except (ConflictError, KeyboardInterrupt):
            raise

        except TikaConversionError as exc:
            self._log_conversion_error(exc, mimetype=mimetype)
            plain_text = ''

        except Exception as exc:
            logger.warn(exc)
            plain_text = ''

        data.setData(plain_text)
        return data
Beispiel #7
0
 def test_invalid_jar_path_causes_converter_to_raise(self):
     tika_converter = TikaConverter(path="/nonexistent")
     with self.assertRaises(TikaJarNotFound):
         tika_converter.convert('')