def parse(self, document_page, descriptor=None): logger.debug('executing') try: office_converter = OfficeConverter() document_file = document_page.document.document_save_to_temp_dir( document_page.document.checksum) logger.debug('document_file: %s', document_file) office_converter.convert( document_file, mimetype=document_page.document.file_mimetype) if office_converter.exists: input_filepath = office_converter.output_filepath logger.debug('office_converter.output_filepath: %s', input_filepath) # Now that the office document has been converted to PDF # call the coresponding PDF parser in this new file parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf') else: raise ParserError except OfficeConversionError as exception: logger.error(exception) raise ParserError
def office_parser(document_page): logger.debug('executing') try: office_converter = OfficeConverter() document_file = document_save_to_temp_dir(document_page.document, document_page.document.checksum) logger.debug('document_file: %s', document_file) office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) if office_converter.exists: input_filepath = office_converter.output_filepath logger.debug('office_converter.output_filepath: %s', input_filepath) pdf_parser(document_page, descriptor=open(input_filepath)) else: raise ParserError except OfficeConversionError, msg: print msg raise ParserError
def parse(self, document_page, descriptor=None): logger.debug('executing') try: office_converter = OfficeConverter() document_file = document_page.document.document_save_to_temp_dir(document_page.document.checksum) logger.debug('document_file: %s', document_file) office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) if office_converter.exists: input_filepath = office_converter.output_filepath logger.debug('office_converter.output_filepath: %s', input_filepath) # Now that the office document has been converted to PDF # call the coresponding PDF parser in this new file parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf') else: raise ParserError except OfficeConversionError as exception: logger.error(exception) raise ParserError
def office_parser(document_page): logger.debug('executing') try: office_converter = OfficeConverter() document_file = document_save_to_temp_dir( document_page.document, document_page.document.checksum) logger.debug('document_file: %s', document_file) office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) if office_converter.exists: input_filepath = office_converter.output_filepath logger.debug('office_converter.output_filepath: %s', input_filepath) pdf_parser(document_page, descriptor=open(input_filepath)) else: raise ParserError except OfficeConversionError, msg: print msg raise ParserError