Python extract_data Examples, invoice2data.main.extract_data Python Examples

Example #1

0

Show file

File: account_invoice_import.py Project: maurolguin1/odoo

 def invoice2data_parse_invoice(self, file_data):
     logger.info('Trying to analyze PDF invoice with invoice2data lib')
     fd, file_name = mkstemp()
     try:
         os.write(fd, file_data)
     finally:
         os.close(fd)
     # Transfer log level of Odoo to invoice2data
     loggeri2data.setLevel(logger.getEffectiveLevel())
     local_templates_dir = tools.config.get('invoice2data_templates_dir',
                                            False)
     logger.debug('invoice2data local_templates_dir=%s',
                  local_templates_dir)
     templates = []
     if local_templates_dir and os.path.isdir(local_templates_dir):
         templates += read_templates(local_templates_dir)
     exclude_built_in_templates = tools.config.get(
         'invoice2data_exclude_built_in_templates', False)
     if not exclude_built_in_templates:
         templates += read_templates(
             pkg_resources.resource_filename('invoice2data', 'templates'))
     logger.debug('Calling invoice2data.extract_data with templates=%s',
                  templates)
     try:
         invoice2data_res = extract_data(file_name, templates=templates)
     except Exception, e:
         raise UserError(
             _("PDF Invoice parsing failed. Error message: %s") % e)

Example #2

0

Show file

File: account_invoice_import.py Project: vertelab/odoo-oca-edi

 def invoice2data_parse_invoice(self, file_data):
     logger.info('Trying to analyze PDF invoice with invoice2data lib')
     fd, file_name = mkstemp()
     try:
         os.write(fd, file_data)
     finally:
         os.close(fd)
     # Transfer log level of Odoo to invoice2data
     loggeri2data.setLevel(logger.getEffectiveLevel())
     local_templates_dir = tools.config.get('invoice2data_templates_dir',
                                            False)
     logger.debug('invoice2data local_templates_dir=%s',
                  local_templates_dir)
     templates = []
     if local_templates_dir and os.path.isdir(local_templates_dir):
         templates += read_templates(local_templates_dir)
     exclude_built_in_templates = tools.config.get(
         'invoice2data_exclude_built_in_templates', False)
     if not exclude_built_in_templates:
         templates += read_templates()
     logger.debug('Calling invoice2data.extract_data with templates=%s',
                  templates)
     try:
         invoice2data_res = extract_data(file_name, templates=templates)
     except Exception as e:
         raise UserError(
             _("PDF Invoice parsing failed. Error message: %s") % e)
     if not invoice2data_res:
         raise UserError(
             _("This PDF invoice doesn't match a known template of "
               "the invoice2data lib."))
     logger.info('Result of invoice2data PDF extraction: %s',
                 invoice2data_res)
     return self.invoice2data_to_parsed_inv(invoice2data_res)

Example #3

0

Show file

 def test_extract_data(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         res = extract_data(file, None)
         print(
             res
         )  # Check why logger.info is not working, for the time being using print
         self.assertTrue(type(res) is dict, "return is not a dict")

Example #4

0

Show file

File: test_lib.py Project: m3nu/invoice2data

 def test_extract_data_pdftotext(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         try:
             res = extract_data(file, None, pdftotext)
             print(res)  # Check why logger.info is not working, for the time being using print
         except ImportError:
             # print("pdftotext module not installed!")
             self.assertTrue(False, "pdftotext is not installed")
         self.assertTrue(type(res) is dict, "return is not a dict")

Example #5

0

Show file

 def test_extract_data_pdftotext(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         try:
             res = extract_data(file, None, pdftotext)
             print(
                 res
             )  # Check why logger.info is not working, for the time being using print
         except ImportError:
             # print("pdftotext module not installed!")
             self.assertTrue(False, "pdftotext is not installed")
         self.assertTrue(type(res) is dict, "return is not a dict")

Example #6

0

Show file

File: test_lib.py Project: m3nu/invoice2data

 def test_extract_data_pdfminer(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         extract_data(file, None, pdfminer_wrapper)

Example #7

0

Show file

File: test_lib.py Project: m3nu/invoice2data

 def test_extract_data(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         res = extract_data(file, None)
         print(res)  # Check why logger.info is not working, for the time being using print
         self.assertTrue(type(res) is dict, "return is not a dict")

Example #8

0

Show file

File: test_lib.py Project: m3nu/invoice2data

def _extract_data_for_export():
    pdf_files = get_sample_files('.pdf')
    for file in pdf_files:
        if file.endswith("oyo.pdf"):
            res = [extract_data(file, None)]
            return res

Example #9

0

Show file

 def _run_test_on_folder(self, folder):
     for path, subdirs, files in os.walk(folder):
         for file in files:
             extract_data(os.path.join(path, file), self.templates)

Example #10

0

Show file

File: test_extraction.py Project: HamishBlank/invoice2data

 def _run_test_on_folder(self, folder):
     for path, subdirs, files in os.walk(folder):
         for file in files:
             res = extract_data(os.path.join(path, file), self.templates)   
             print(file, res)

Example #11

0

Show file

 def test_extract_data_pdfminer(self):
     pdf_files = get_sample_files('.pdf')
     for file in pdf_files:
         extract_data(file, None, pdfminer_wrapper)

Example #12

0

Show file

def _extract_data_for_export():
    pdf_files = get_sample_files('.pdf')
    for file in pdf_files:
        if file.endswith("oyo.pdf"):
            res = [extract_data(file, None)]
            return res