def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else :
            target = self.Files.jpgName_header
        date = search_by_positions(self.supplier, 'date', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        if date and date[0]:
            res = self.formatDate(date[0], date[1])
            if res:
                self.date = res[0]
                self.Log.info('Date found using mask position : ' + str(res[0]))

                if len(date) == 3:
                    return [res[0], res[1], date[2]]
                else:
                    return [res[0], res[1], '']

        for line in self.text:
            res = self.process(re.sub(r'(\d)\s+(\d)', r'\1\2', line.content), line.position)
            if not res :
                res = self.process(line.content, line.position)
                if res:
                    return [res[0], res[1], self.nbPages]
            else:
                return [res[0], res[1], self.nbPages]
    def run(self):
        found = False
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        invoiceNumber = search_by_positions(self.supplier, 'invoice',
                                            self.Config, self.Locale, self.Ocr,
                                            self.Files, target, self.typo)
        if invoiceNumber and invoiceNumber[0]:
            return invoiceNumber

        for line in self.text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "",
                                        line.content.upper()):
                tmpInvoiceNumber = re.sub(
                    r"" + self.Locale.invoiceRegex[:-2] + "", '',
                    _invoice.group())  # Delete the invoice keyword
                invoiceNumber = tmpInvoiceNumber.lstrip().split(' ')[0]
                if len(invoiceNumber) > int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoiceNumber)
                    return [invoiceNumber, line.position, self.nbPages]
                else:
                    found = False

        if not found and self.supplier and not self.customPage:
            self.Log.info(
                'Invoice number not found. Searching invoice number using position in database'
            )
            position = self.Database.select({
                'select': ['invoice_number_position', 'invoice_number_page'],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]

            if position and position['invoice_number_position']:
                data = {
                    'position': position['invoice_number_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['invoice_number_page']
                }
                text, position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)

                if text != '':
                    self.Log.info('Invoice number found with position : ' +
                                  text)
                    return [text, position, data['page']]
                else:
                    return False
            else:
                return False
        else:
            return False
Example #3
0
    def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        invoice_number = search_by_positions(self.supplier, 'invoice', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        if invoice_number and invoice_number[0]:
            return invoice_number

        if self.supplier and not self.customPage:
            position = self.Database.select({
                'select': ['invoice_number_position', 'invoice_number_page'],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]

            if position and position['invoice_number_position'] not in [False, 'NULL', '', None]:
                data = {'position': position['invoice_number_position'], 'regex': None, 'target': 'full', 'page': position['invoice_number_page']}
                text, position = search_custom_positions(data, self.Ocr, self.Files, self.Locale, self.file, self.Config)

                if text != '':
                    self.Log.info('Invoice number found with position : ' + str(text))
                    return [text, position, data['page']]

        for line in self.text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "", line.content.upper()):
                invoice_res = _invoice.group()
                # If the regex return a date, remove it
                for _date in re.finditer(r"" + self.Locale.dateRegex + "", _invoice.group()):
                    if _date.group():
                        invoice_res = _invoice.group().replace(_date.group(), '')

                tmp_invoice_number = re.sub(r"" + self.Locale.invoiceRegex[:-2] + "", '', invoice_res)  # Delete the invoice keyword
                invoice_number = tmp_invoice_number.lstrip().split(' ')[0]

                if len(invoice_number) >= int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoice_number)
                    return [invoice_number, line.position, self.nbPages]

        for line in self.footer_text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "", line.content.upper()):
                invoice_res = _invoice.group()
                # If the regex return a date, remove it
                for _date in re.finditer(r"" + self.Locale.dateRegex + "", _invoice.group()):
                    if _date.group():
                        invoice_res = _invoice.group().replace(_date.group(), '')

                tmp_invoice_number = re.sub(r"" + self.Locale.invoiceRegex[:-2] + "", '', invoice_res)  # Delete the invoice keyword
                invoice_number = tmp_invoice_number.lstrip().split(' ')[0]

                if len(invoice_number) >= int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoice_number)
                    position = self.Files.return_position_with_ratio(line, 'footer')
                    return [invoice_number, position, self.nbPages]
    def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName
        else :
            target = self.Files.jpgName
        allRate  = search_by_positions(self.supplier, 'total_amount', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        allRateAmount = {}
        if allRate and allRate[0]:
            allRateAmount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "", allRate[0].replace(',', '.')),
                1: allRate[1]
            }
        noRate   = search_by_positions(self.supplier, 'ht_amount', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        noRateAmount = {}
        if noRate and noRate[0]:
            noRateAmount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "", noRate[0].replace(',', '.')),
                1: allRate[1]
            }
        percentage = search_by_positions(self.supplier, 'rate_percentage', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        ratePercentage = {}
        if percentage and percentage[0]:
            ratePercentage = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "", percentage[0].replace(',', '.')),
                1: allRate[1]
            }

        if not self.test_amount(noRateAmount, allRateAmount, ratePercentage):
            noRateAmount    = self.process(self.Locale.noRatesRegex)
            ratePercentage  = self.process(self.Locale.vatRateRegex)
            allRateAmount   = self.process(self.Locale.allRatesRegex)

        # Test all amounts. If some are false, try to search them with position. If not, pass
        if self.test_amount(noRateAmount, allRateAmount, ratePercentage) is not False:
            # First args is amount, second is position
            noRateAmount    = self.return_max(self.noRateAmount)
            allRateAmount   = self.return_max(self.allRateAmount)
            ratePercentage  = self.return_max(self.ratePercentage)

            if noRateAmount is False and allRateAmount and ratePercentage:
                noRateAmount    = [float("%.2f" % (float(allRateAmount[0]) / (1 + float(ratePercentage[0] / 100)))), (('',''),('',''))]
            elif allRateAmount is False and noRateAmount and ratePercentage:
                allRateAmount   = [float("%.2f" % (float(noRateAmount[0]) + (float(noRateAmount[0]) * float(ratePercentage[0] / 100)))), (('',''),('',''))]
            elif ratePercentage is False and noRateAmount and allRateAmount:
                vatAmount       = float("%.2f" % (float(allRateAmount[0]) - float(noRateAmount[0])))
                ratePercentage  = [float("%.2f" % (float(vatAmount) / float(noRateAmount[0]) * 100)), (('',''),('',''))]

            # Test if the three var's are good by simple math operation
            # Round up value with 2 decimals
            try:
                total    = "%.2f" % (float(noRateAmount[0]) + (float(noRateAmount[0]) * float(ratePercentage[0]) / 100))
            except TypeError:
                return False

            if float(total) == float(allRateAmount[0]):
                self.Log.info('Footer informations found : [TOTAL : ' + str(total) + '] - [HT : ' + str(noRateAmount[0]) + '] - [VATRATE : ' + str(ratePercentage[0]) + ']')
                return [noRateAmount, allRateAmount, ratePercentage, 1]
            else:
                return False
        else:
            return False
    def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        date = search_by_positions(self.supplier, 'date', self.Config,
                                   self.Locale, self.Ocr, self.Files, target,
                                   self.typo)
        due_date = False
        if date and date[0]:
            res = self.format_date(date[0], date[1])
            if res:
                self.date = res[0]
                self.Log.info('Date found using mask position : ' +
                              str(res[0]))

                if len(date) == 3:
                    return [res[0], res[1], date[2]]
                else:
                    return [res[0], res[1], '']

        if self.supplier:
            position = self.db.select({
                'select': [
                    'invoice_date_position', 'invoice_date_page',
                    'due_date_position', 'due_date_page'
                ],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]
            if position and position['due_date_position'] not in [
                    False, 'NULL', '', None
            ]:
                data = {
                    'position': position['due_date_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['due_date_page']
                }
                _text, _position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)
                if _text != '':
                    res = self.format_date(_text, _position, True)
                    if res:
                        due_date = [res[0], res[1]]
                        self.Log.info('Due date found using position : ' +
                                      str(res[0]))

        if not due_date:
            for line in self.text:
                due_date = self.process_due_date(
                    re.sub(r'(\d)\s+(\d)', r'\1\2', line.content.upper()),
                    line.position)
                if due_date:
                    break

        if self.supplier:
            if position and position['invoice_date_position'] not in [
                    False, 'NULL', '', None
            ]:
                data = {
                    'position': position['invoice_date_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['invoice_date_page']
                }
                text, position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)
                if text != '':
                    res = self.format_date(text, position, True)
                    if res:
                        self.date = res[0]
                        self.Log.info('Invoice date found using position : ' +
                                      str(res[0]))
                        return [self.date, position, data['page'], due_date]

        for line in self.text:
            res = self.process(line.content.upper(), line.position)
            if res:
                self.Log.info('Invoice date found : ' + res[0])
                return [res[0], res[1], self.nbPages, due_date]

        for line in self.text:
            res = self.process(re.sub(r'(\d)\s+(\d)', r'\1\2', line.content),
                               line.position)
            if not res:
                res = self.process(line.content, line.position)
                if res:
                    return [res[0], res[1], self.nbPages, due_date]
            else:
                return [res[0], res[1], self.nbPages, due_date]
    def run(self, text_as_string=False):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName
        else:
            target = self.Files.jpgName
        all_rate = search_by_positions(self.supplier, 'ttc', self.Config,
                                       self.Locale, self.Ocr, self.Files,
                                       target, self.typo)
        all_rate_amount = {}
        if all_rate and all_rate[0]:
            all_rate_amount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "",
                          all_rate[0].replace(',', '.')),
                1: all_rate[1]
            }
        no_rate = search_by_positions(self.supplier, 'no_taxes', self.Config,
                                      self.Locale, self.Ocr, self.Files,
                                      target, self.typo)
        no_rate_amount = {}
        if no_rate and no_rate[0]:
            no_rate_amount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "",
                          no_rate[0].replace(',', '.')),
                1: all_rate[1]
            }
        percentage = search_by_positions(self.supplier, 'rate_percentage',
                                         self.Config, self.Locale, self.Ocr,
                                         self.Files, target, self.typo)
        rate_percentage = {}
        if percentage and percentage[0]:
            rate_percentage = {
                0:
                re.sub(r"[^0-9\.]|\.(?!\d)", "",
                       percentage[0].replace(',', '.')),
                1:
                all_rate[1]
            }

        vat_amount = False

        if not self.test_amount(no_rate_amount, all_rate_amount,
                                rate_percentage):
            no_rate_amount = self.process(self.Locale.noRatesRegex,
                                          text_as_string)
            rate_percentage = self.process(self.Locale.vatRateRegex,
                                           text_as_string)
            all_rate_amount = self.process(self.Locale.allRatesRegex,
                                           text_as_string)

        if all_rate_amount and no_rate_amount:
            vat_amount = float("%.2f" % (self.return_max(all_rate_amount)[0] -
                                         self.return_max(no_rate_amount)[0]))

        if all_rate_amount and vat_amount and not no_rate_amount:
            no_rate_amount = [
                float("%.2f" % self.return_max(all_rate_amount)[0] -
                      self.return_max(vat_amount)[0]), (('', ''), ('', ''))
            ]

        if all_rate_amount and rate_percentage and not no_rate_amount:
            no_rate_amount = [
                float(
                    "%.2f" %
                    (self.return_max(all_rate_amount)[0] /
                     (1 + float(self.return_max(rate_percentage)[0] / 100)))),
                (('', ''), ('', ''))
            ]

        # Test all amounts. If some are false, try to search them with position. If not, pass
        if self.test_amount(no_rate_amount, all_rate_amount,
                            rate_percentage) is not False:
            # First args is amount, second is position
            no_rate_amount = self.return_max(self.noRateAmount)
            all_rate_amount = self.return_max(self.allRateAmount)
            rate_percentage = self.return_max(self.ratePercentage)

            if no_rate_amount is False and all_rate_amount and rate_percentage:
                no_rate_amount = [
                    float("%.2f" % (float(all_rate_amount[0]) /
                                    (1 + float(rate_percentage[0] / 100)))),
                    (('', ''), ('', '')), True
                ]
            elif all_rate_amount is False and no_rate_amount and rate_percentage:
                all_rate_amount = [
                    float("%.2f" % (float(no_rate_amount[0]) +
                                    (float(no_rate_amount[0]) *
                                     float(float(rate_percentage[0]) / 100)))),
                    (('', ''), ('', '')), True
                ]
            elif rate_percentage is False and no_rate_amount and all_rate_amount:
                vat_amount = float(
                    "%.2f" %
                    (float(all_rate_amount[0]) - float(no_rate_amount[0])))
                rate_percentage = [
                    float(
                        "%.2f" %
                        (float(vat_amount) / float(no_rate_amount[0]) * 100)),
                    (('', ''), ('', '')), True
                ]

            # Test if the three var's are good by simple math operation
            # Round up value with 2 decimals
            try:
                total = "%.2f" % (float(no_rate_amount[0]) +
                                  (float(no_rate_amount[0]) *
                                   float(rate_percentage[0]) / 100))
            except TypeError:
                return False

            if float(total) == float(all_rate_amount[0]):
                self.Log.info('Footer informations found : [TOTAL : ' +
                              str(total) + '] - [HT : ' +
                              str(no_rate_amount[0]) + '] - [VATRATE : ' +
                              str(rate_percentage[0]) + ']')
                return [
                    no_rate_amount, all_rate_amount, rate_percentage,
                    self.nbPage,
                    [
                        "%.2f" % float(
                            float(no_rate_amount[0]) *
                            (float(rate_percentage[0]) / 100))
                    ]
                ]
            elif float(all_rate_amount[0]) == float(vat_amount +
                                                    no_rate_amount[0]):
                self.Log.info('Footer informations found : [TOTAL : ' +
                              str(total) + '] - [HT : ' +
                              str(no_rate_amount[0]) + '] - [VATRATE : ' +
                              str(rate_percentage[0]) + ']')
                return [
                    no_rate_amount, all_rate_amount, rate_percentage,
                    self.nbPage,
                    [
                        "%.2f" % float(
                            float(no_rate_amount[0]) *
                            (float(rate_percentage[0]) / 100))
                    ]
                ]
            else:
                return False
        else:
            if not self.rerun:
                self.rerun = True
                if self.Files.isTiff == 'True':
                    improved_image = self.Files.improve_image_detection(
                        self.Files.tiffName_footer)
                else:
                    improved_image = self.Files.improve_image_detection(
                        self.Files.jpgName_footer)
                self.Files.open_img(improved_image)
                self.text = self.Ocr.line_box_builder(self.Files.img)
                return self.run()

            if self.rerun and not self.rerun_as_text:
                self.rerun_as_text = True
                self.text = self.Ocr.text_builder(self.Files.img)
                return self.run(text_as_string=True)
            return False
    def run(self, text_as_string=False):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName
        else:
            target = self.Files.jpgName
        all_rate = search_by_positions(self.supplier, 'ttc', self.Config,
                                       self.Locale, self.Ocr, self.Files,
                                       target, self.typo)
        all_rate_amount = {}
        if all_rate and all_rate[0]:
            all_rate_amount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "",
                          all_rate[0].replace(',', '.')),
                1: all_rate[1]
            }
        no_rate = search_by_positions(self.supplier, 'no_taxes', self.Config,
                                      self.Locale, self.Ocr, self.Files,
                                      target, self.typo)
        no_rate_amount = {}
        if no_rate and no_rate[0]:
            no_rate_amount = {
                0: re.sub(r"[^0-9\.]|\.(?!\d)", "",
                          no_rate[0].replace(',', '.')),
                1: no_rate[1]
            }
        percentage = search_by_positions(self.supplier, 'rate_percentage',
                                         self.Config, self.Locale, self.Ocr,
                                         self.Files, target, self.typo)
        rate_percentage = {}
        if percentage and percentage[0]:
            rate_percentage = {
                0:
                re.sub(r"[^0-9\.]|\.(?!\d)", "",
                       percentage[0].replace(',', '.')),
                1:
                percentage[1]
            }

        vat_amount = {}

        if not self.test_amount(no_rate_amount, all_rate_amount,
                                rate_percentage, vat_amount):
            no_rate_amount = self.process(self.Locale.noRatesRegex,
                                          text_as_string)
            rate_percentage = self.process(self.Locale.vatRateRegex,
                                           text_as_string)
            all_rate_amount = self.process(self.Locale.allRatesRegex,
                                           text_as_string)
            vat_amount = self.process(self.Locale.vatAmountRegex,
                                      text_as_string)

        # Test all amounts. If some are false, try to search them with position. If not, pass
        if self.test_amount(no_rate_amount, all_rate_amount, rate_percentage,
                            vat_amount) is not False:
            no_rate_amount = self.return_max(self.noRateAmount)
            all_rate_amount = self.return_max(self.allRateAmount)
            rate_percentage = self.return_max(self.ratePercentage)
            vat_amount = self.return_max(self.vatAmount)
            self.Log.info('Raw footer informations found : [TOTAL : ' +
                          str(all_rate_amount[0]) + '] - [HT : ' +
                          str(no_rate_amount[0]) + '] - [VATRATE : ' +
                          str(rate_percentage[0]) + '] - [VAT AMOUNT : ' +
                          str(vat_amount[0]) + ']')
            return [
                no_rate_amount, all_rate_amount, rate_percentage, self.nbPage,
                vat_amount
            ]
        else:
            if not self.rerun:
                self.rerun = True
                if self.Files.isTiff == 'True':
                    improved_image = self.Files.improve_image_detection(
                        self.Files.tiffName_footer)
                else:
                    improved_image = self.Files.improve_image_detection(
                        self.Files.jpgName_footer)
                self.Files.open_img(improved_image)
                self.text = self.Ocr.line_box_builder(self.Files.img)
                return self.run()

            if self.rerun and not self.rerun_as_text:
                self.rerun_as_text = True
                self.text = self.Ocr.text_builder(self.Files.img)
                return self.run(text_as_string=True)
            return False