Example #1
0
 def test_missing_filename_python(self):
     """Make sure missing files raise the correct error"""
     filename = self.get_temp_filename()
     os.remove(filename)
     import convertextract
     from convertextract.exceptions import MissingFileError
     with self.assertRaises(MissingFileError):
         convertextract.process(filename)
Example #2
0
 def test_unsupported_extension_python(self):
     """Make sure unsupported extension raises the correct error"""
     filename = self.get_temp_filename(extension="extension")
     import convertextract
     from convertextract.exceptions import ExtensionNotSupported
     with self.assertRaises(ExtensionNotSupported):
         convertextract.process(filename)
     os.remove(filename)
Example #3
0
 def test_standardized_text_python(self):
     """Make sure standardized text matches from python"""
     import convertextract
     result = convertextract.process(self.standardized_text_filename)
     if isinstance(result, bytes):
         result = result.decode("utf8")
     self.assertEqual(
         ''.join(result.split()),
         self.get_standardized_text(),
         "standardized text fails for %s" % self.extension,
     )
Example #4
0
 def compare_python_output(self, filename, expected_filename=None, **kwargs):
     if expected_filename is None:
         expected_filename = self.get_expected_filename(filename, **kwargs)
     
     import convertextract
     result = convertextract.process(filename, **kwargs)
     if isinstance(result, bytes):
         result = result.decode("utf8")
     with open(expected_filename, 'r', encoding='utf8') as stream:
         result = self.clean_str(result)
         expected = self.clean_str(stream.read())
         self.assertEqual(result, expected)
Example #5
0
 def compare_converted_python_output(self,
                                     filename,
                                     expected_filename=None,
                                     **kwargs):
     # import pdb; pdb.set_trace()
     if expected_filename is None:
         expected_filename = self.get_expected_filename(filename, **kwargs)
     # print(kwargs['language'])
     import convertextract
     result = convertextract.process(filename, **kwargs)
     if isinstance(result, bytes):
         result = result.decode("utf8")
     # print(type(result))
     # self.maxDiff = None
     with open(expected_filename, 'r', encoding="utf8") as stream:
         result = self.clean_str(result)
         expected = self.clean_str(stream.read())
         self.assertEqual(result, expected)
Example #6
0
def gooey_main():
    """Interpret the command-line arguments, process the document and
    raise errors accordingly (with traceback surpressed).
    """
    parser = get_gui()
    args = parser.parse_args()
    try:
        logging.info(f'Extracting text from "{args.filename}"')
        if args.mapping:
            logging.info(
                f'Preparing to convert from mapping file provided at "{args.mapping}"')
        else:
            logging.info(
                f'Preparing to convert from "{args.input_language}" to "{args.output_language}"')
        output = process(**vars(args))
    except CommandLineError as ex:
        logging.error(
            "Whoops. Something went wrong. Please check your input file and your mapping file (if using a custom one)")
        sys.stderr.write(red(ex) + '\n')
        sys.exit(1)
    else:
        fn, ext = os.path.splitext(args.filename)
        output_path = fn + '_converted' + ext
        logging.info(f'Converted file available at "{output_path}"')