コード例 #1
0
    def process_with_position(self, select):
        position = self.Database.select({
            'select': select,
            'table' : ['suppliers'],
            'where' : ['vat_number = ?'],
            'data'  : [self.supplier[0]]
        })[0]

        if position and position[select[0]]:
            data = {'position' : position[select[0]], 'regex': None, 'target' : 'full', 'page' : position[select[1]]}
            text, position = search_custom_positions(data, self.Ocr, self.Files, self.Locale, self.file, self.Config)
            if text:
                # Filter the result to get only the digits
                text = re.finditer(r'[-+]?\d*[.,\s]+\d+|\d+', text)
                result = ''
                for t in text:
                    result += re.sub('\s*', '', t.group())

                if result != '':
                    result      = float(result.replace(',', '.'))
                    return [result, position, data['page']]
                else:
                    return False
            else:
                return False
        else:
            return False
コード例 #2
0
    def run(self):
        dataToReturn = {}
        list_of_fields = {}
        if self.typo:
            list_of_fields = retrieve_custom_positions(self.typo, self.Config)
        elif self.supplier and self.supplier[2]['typology']:
            list_of_fields = retrieve_custom_positions(self.supplier[2]['typology'], self.Config)

        if list_of_fields:
            for index in list_of_fields:
                data, position = search_custom_positions(list_of_fields[index], self.Ocr, self.Files, self.Locale, self.file, self.Config)
                if not data and list_of_fields[index]['regex'] is not False:
                    dataToReturn[index] = [self.process(list_of_fields[index]), position, list_of_fields[index]['column']]
                    if list_of_fields[index]['type'] == 'date':
                        if index in dataToReturn and dataToReturn[index][0]:
                            for date in re.finditer(r"" + self.Locale.dateRegex, dataToReturn[index][0]):
                                dataToReturn[index] = [date.group(), position, list_of_fields[index]['column']]
                        elif list_of_fields[index]['type'] == 'number':
                            if index in dataToReturn and dataToReturn[index][0]:
                                dataToReturn[index] = [data, position, list_of_fields[index]['column']]
                else:
                    if list_of_fields[index]['type'] == 'date':
                        for date in re.finditer(r"" + self.Locale.dateRegex, data):
                            data = date.group()
                    elif list_of_fields[index]['type'] == 'number':
                        data = re.sub('[^0-9]', '', data)
                    dataToReturn[index] = [data, position, list_of_fields[index]['column']]
        return dataToReturn
コード例 #3
0
    def run(self):
        found = False
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        invoiceNumber = search_by_positions(self.supplier, 'invoice',
                                            self.Config, self.Locale, self.Ocr,
                                            self.Files, target, self.typo)
        if invoiceNumber and invoiceNumber[0]:
            return invoiceNumber

        for line in self.text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "",
                                        line.content.upper()):
                tmpInvoiceNumber = re.sub(
                    r"" + self.Locale.invoiceRegex[:-2] + "", '',
                    _invoice.group())  # Delete the invoice keyword
                invoiceNumber = tmpInvoiceNumber.lstrip().split(' ')[0]
                if len(invoiceNumber) > int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoiceNumber)
                    return [invoiceNumber, line.position, self.nbPages]
                else:
                    found = False

        if not found and self.supplier and not self.customPage:
            self.Log.info(
                'Invoice number not found. Searching invoice number using position in database'
            )
            position = self.Database.select({
                'select': ['invoice_number_position', 'invoice_number_page'],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]

            if position and position['invoice_number_position']:
                data = {
                    'position': position['invoice_number_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['invoice_number_page']
                }
                text, position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)

                if text != '':
                    self.Log.info('Invoice number found with position : ' +
                                  text)
                    return [text, position, data['page']]
                else:
                    return False
            else:
                return False
        else:
            return False
コード例 #4
0
    def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        invoice_number = search_by_positions(self.supplier, 'invoice', self.Config, self.Locale, self.Ocr, self.Files, target, self.typo)
        if invoice_number and invoice_number[0]:
            return invoice_number

        if self.supplier and not self.customPage:
            position = self.Database.select({
                'select': ['invoice_number_position', 'invoice_number_page'],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]

            if position and position['invoice_number_position'] not in [False, 'NULL', '', None]:
                data = {'position': position['invoice_number_position'], 'regex': None, 'target': 'full', 'page': position['invoice_number_page']}
                text, position = search_custom_positions(data, self.Ocr, self.Files, self.Locale, self.file, self.Config)

                if text != '':
                    self.Log.info('Invoice number found with position : ' + str(text))
                    return [text, position, data['page']]

        for line in self.text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "", line.content.upper()):
                invoice_res = _invoice.group()
                # If the regex return a date, remove it
                for _date in re.finditer(r"" + self.Locale.dateRegex + "", _invoice.group()):
                    if _date.group():
                        invoice_res = _invoice.group().replace(_date.group(), '')

                tmp_invoice_number = re.sub(r"" + self.Locale.invoiceRegex[:-2] + "", '', invoice_res)  # Delete the invoice keyword
                invoice_number = tmp_invoice_number.lstrip().split(' ')[0]

                if len(invoice_number) >= int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoice_number)
                    return [invoice_number, line.position, self.nbPages]

        for line in self.footer_text:
            for _invoice in re.finditer(r"" + self.Locale.invoiceRegex + "", line.content.upper()):
                invoice_res = _invoice.group()
                # If the regex return a date, remove it
                for _date in re.finditer(r"" + self.Locale.dateRegex + "", _invoice.group()):
                    if _date.group():
                        invoice_res = _invoice.group().replace(_date.group(), '')

                tmp_invoice_number = re.sub(r"" + self.Locale.invoiceRegex[:-2] + "", '', invoice_res)  # Delete the invoice keyword
                invoice_number = tmp_invoice_number.lstrip().split(' ')[0]

                if len(invoice_number) >= int(self.Locale.invoiceSizeMin):
                    self.Log.info('Invoice number found : ' + invoice_number)
                    position = self.Files.return_position_with_ratio(line, 'footer')
                    return [invoice_number, position, self.nbPages]
コード例 #5
0
    def run(self):
        if self.supplier and not self.customPage:
            position = self.Database.select({
                'select': ['delivery_number_1_position'],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]

            if position and position['delivery_number_1_position'] not in [False, 'NULL', '', None]:
                data = {'position': position['delivery_number_1_position'], 'regex': None, 'target': 'full', 'page': '1'}
                text, position = search_custom_positions(data, self.Ocr, self.Files, self.Locale, self.file, self.Config)
                if text is not False:
                    for _delivery in re.finditer(r"" + self.Locale.deliveryNumberRegex + "", str(text.upper())):
                        delivery_res = _delivery.group()
                        # If the regex return a date, remove it
                        for _date in re.finditer(r"" + self.Locale.dateRegex + "", _delivery.group()):
                            if _date.group():
                                delivery_res = _delivery.group().replace(_date.group(), '')

                        tmp_delivery_number = re.sub(r"" + self.Locale.deliveryNumberRegex[:-2] + "", '', delivery_res)  # Delete the delivery number keyword
                        delivery_number = tmp_delivery_number.lstrip().split(' ')[0]

                        if delivery_number != '':
                            self.Log.info('Delivery number found with position : ' + str(delivery_number))
                            return [delivery_number, position, data['page']]
                    if text != "":
                        self.Log.info('Delivery number found with position : ' + str(text))
                        return [text, position, data['page']]

        for line in self.text:
            for _delivery in re.finditer(r"" + self.Locale.deliveryNumberRegex + "", line.content.upper()):
                delivery_res = _delivery.group()

                # If the regex return a date, remove it
                for _date in re.finditer(r"" + self.Locale.dateRegex + "", _delivery.group()):
                    if _date.group():
                        delivery_res = _delivery.group().replace(_date.group(), '')

                tmp_delivery_number = re.sub(r"" + self.Locale.deliveryNumberRegex[:-2] + "", '', delivery_res)  # Delete the delivery number keyword
                delivery_number = tmp_delivery_number.lstrip().split(' ')[0]

                if len(delivery_number) >= int(self.Locale.invoiceSizeMin):
                    self.Log.info('Delivery number found : ' + delivery_number)
                    position = line.position
                    if self.target != 'header':
                        position = self.Files.return_position_with_ratio(line, self.target)
                    return [delivery_number, position, self.nbPages]
        return False
コード例 #6
0
    def run(self):
        if self.Files.isTiff == 'True':
            target = self.Files.tiffName_header
        else:
            target = self.Files.jpgName_header
        date = search_by_positions(self.supplier, 'date', self.Config,
                                   self.Locale, self.Ocr, self.Files, target,
                                   self.typo)
        due_date = False
        if date and date[0]:
            res = self.format_date(date[0], date[1])
            if res:
                self.date = res[0]
                self.Log.info('Date found using mask position : ' +
                              str(res[0]))

                if len(date) == 3:
                    return [res[0], res[1], date[2]]
                else:
                    return [res[0], res[1], '']

        if self.supplier:
            position = self.db.select({
                'select': [
                    'invoice_date_position', 'invoice_date_page',
                    'due_date_position', 'due_date_page'
                ],
                'table': ['suppliers'],
                'where': ['vat_number = ?'],
                'data': [self.supplier[0]]
            })[0]
            if position and position['due_date_position'] not in [
                    False, 'NULL', '', None
            ]:
                data = {
                    'position': position['due_date_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['due_date_page']
                }
                _text, _position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)
                if _text != '':
                    res = self.format_date(_text, _position, True)
                    if res:
                        due_date = [res[0], res[1]]
                        self.Log.info('Due date found using position : ' +
                                      str(res[0]))

        if not due_date:
            for line in self.text:
                due_date = self.process_due_date(
                    re.sub(r'(\d)\s+(\d)', r'\1\2', line.content.upper()),
                    line.position)
                if due_date:
                    break

        if self.supplier:
            if position and position['invoice_date_position'] not in [
                    False, 'NULL', '', None
            ]:
                data = {
                    'position': position['invoice_date_position'],
                    'regex': None,
                    'target': 'full',
                    'page': position['invoice_date_page']
                }
                text, position = search_custom_positions(
                    data, self.Ocr, self.Files, self.Locale, self.file,
                    self.Config)
                if text != '':
                    res = self.format_date(text, position, True)
                    if res:
                        self.date = res[0]
                        self.Log.info('Invoice date found using position : ' +
                                      str(res[0]))
                        return [self.date, position, data['page'], due_date]

        for line in self.text:
            res = self.process(line.content.upper(), line.position)
            if res:
                self.Log.info('Invoice date found : ' + res[0])
                return [res[0], res[1], self.nbPages, due_date]

        for line in self.text:
            res = self.process(re.sub(r'(\d)\s+(\d)', r'\1\2', line.content),
                               line.position)
            if not res:
                res = self.process(line.content, line.position)
                if res:
                    return [res[0], res[1], self.nbPages, due_date]
            else:
                return [res[0], res[1], self.nbPages, due_date]
コード例 #7
0
    def process_footer_with_position(self, select):
        position = self.Database.select({
            'select': select,
            'table': ['suppliers'],
            'where': ['vat_number = ?'],
            'data': [self.supplier[0]]
        })[0]

        if position and position[select[0]] not in [
                '((,),(,))', 'NULL', None, '', False
        ]:
            page = position[select[1]]
            if self.target == 'full':
                page = self.nbPage

            data = {
                'position': position[select[0]],
                'regex': None,
                'target': 'full',
                'page': page
            }
            text, position = search_custom_positions(data, self.Ocr,
                                                     self.Files, self.Locale,
                                                     self.file, self.Config)
            if text:
                try:
                    # Try if the return string could be convert to float
                    float(text)
                    result = text
                    if select[
                            0] == 'vat_1_position':  # Fix if we retrieve 2000.0, or 200.0 instead of 20.0 for example
                        tva_amounts = eval(self.Locale.vatRateList)
                        _split = result.split('.')
                        if _split[1] == '0':
                            result = _split[0]

                        for tva in tva_amounts:
                            if str(tva) in str(result.replace(',', '.')):
                                result = str(tva)
                                break
                except (ValueError, SyntaxError, TypeError):
                    # If results isn't a float, transform it
                    text = re.finditer(r'[-+]?\d*[.,]+\d+([.,]+\d+)?|\d+',
                                       text)
                    result = ''
                    for t in text:
                        result += t.group()

                    if select[0] != 'vat_1_position':
                        try:
                            text = result.replace(' ', '.')
                            text = text.replace('\x0c', '')
                            text = text.replace('\n', '')
                            text = text.replace(',', '.')
                            splitted_number = text.split('.')
                            if len(splitted_number) > 1:
                                last_index = splitted_number[
                                    len(splitted_number) - 1]
                                if len(last_index) > 2:
                                    result = text.replace('.', '')
                                else:
                                    splitted_number.pop(-1)
                                    result = ''.join(
                                        splitted_number) + '.' + last_index
                                    result = str(float(result))
                        except (ValueError, SyntaxError, TypeError):
                            pass

                if result != '':
                    result = re.sub('\s*', '', result).replace(',', '.')
                    self.nbPage = data['page']
                    return [result, position, data['page']]
                else:
                    return False
            else:
                return False
        else:
            return False