コード例 #1
0
ファイル: maxibps.py プロジェクト: bubbas/ofxstatement
    def parse_record(self, rec):
        MultilineRecRE = re.compile(r"\n\s+")
        SplitItemRE = re.compile(r":\s+")
        # join together multiline items
        rec = MultilineRecRE.sub(" ", rec)
        res_iter = (SplitItemRE.split(line) for line in rec.split("\n"))
        res_dict = dict((item[0].strip(), item[1].strip())
            for item in res_iter)
        res_key_set = set(res_dict.keys())
        # Insurance against unknown keys in the statement
        assert res_key_set <= self._keys, \
            "Unknown keys in the transaction: %s" % (res_key_set - self._keys)

        # no idea, how to make this unique and monotonically increasing
        trans_ID = res_dict['datum zaúčtování'] + res_dict['částka'] + \
            res_dict['variabilní symbol'] + res_dict['protiúčet']
        date_str = res_dict['datum zaúčtování'].split('.')
        stat_line = StatementLine(id=trans_ID,
            date=datetime.date(int(date_str[2]), int(date_str[1]),
                int(date_str[0])),
            memo="%s\nozn. operace: %s" % (res_dict['poznámka'],
                res_dict['označení operace']),
            amount=float(res_dict['částka']))
        stat_line.payee = res_dict['název protiúčtu']
        # According to OFX spec this could be "check (or other reference) no."
        # I don't see any requirement on monotonicity or uniqueness, but
        # I wonder how GnuCash understands that
        stat_line.check_no = res_dict['variabilní symbol']
        return stat_line
コード例 #2
0
    def parse_record(self, line):
        if self.cur_record <= 2:
            # Skip header line
            return None

        if not self.statement.account_id:
            self.statement.account_id = line[16]

        # Get all fields
        type_code = line[12]
        date = line[1]
        date_user = line[11]
        c_or_d = line[14]
        amount = self.parse_float(line[3])
        id = line[10]
        refnum = line[10]
        note = line[9]
        payee_name = line[4]
        currency = line[17]

        # Convert LVL to EUR
        if currency == 'LVL':
            currency = 'EUR'
            amount = round(float(amount) / 0.702804, 2)

        # Create a statement line
        stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount))
        stmt_line.payee = payee_name
        stmt_line.refnum = refnum
        stmt_line.date_user = self.parse_datetime(date_user)

        # Credit & Debit stuff
        stmt_line.trntype = "DEP"
        if c_or_d == 'D':
            stmt_line.amount = -stmt_line.amount
            stmt_line.trntype = "DEBIT"

        # Various types
        if 'PMNTCCRDCWDL' in type_code:
            stmt_line.trntype = "ATM"
        elif 'ACMTMDOPFEES' in type_code:
            stmt_line.trntype = "SRVCHG"
        elif 'LDASCSLNINTR' in type_code:
            stmt_line.trntype = "INT"
        elif 'PMNTCCRDOTHR' in type_code:
            stmt_line.trntype = "PAYMENT"
            m = self.card_purchase_re.match(stmt_line.memo)
            if m:
                # This is an electronic purchase. Extract check number from the memo field
                stmt_line.check_no = m.group(1)

        elif 'PMNTRCDTESCT' in type_code or 'PMNTICDTESCT' in type_code:
            stmt_line.trntype = "XFER"

        # DEBUG
        if self.debug:
            print(stmt_line, stmt_line.trntype)

        return stmt_line
コード例 #3
0
    def parse(self):
        statement = Statement(bank_id='321081669', currency='USD')

        with open(self.filename) as f:
            for row in csv.DictReader(f):
                line = StatementLine(id=row['Transaction Number'],
                                     date=self.parse_datetime(row['Date']),
                                     memo=row['Statement Description'],
                                     amount=Decimal(row['Debit']
                                                    or row['Credit']))

                line.payee = row['Description']
                line.check_no = row['Check Number']
                line.trntype = self.guess_type(line.payee, line.amount)
                statement.lines.append(line)

        return statement
コード例 #4
0
    def split_records(self) -> Iterator[Any]:
        """Return iterable object consisting of a line per transaction.

        It starts by determining in order):
        A) the account id
        B) the BIC of the bank
        C) the transactions that have 6 columns

        The 6 columns (spread over 3 rows) of the transactions:
        1) DATE COMPTA
        2) LIBELLE/REFERENCE
        3) DATE OPERATION
        4) DATE VALEUR
        5) DEBIT EUROS
        6) CREDIT EUROS

        Notes about parsing
        ===================
        I) The difficulty with the transactions is that the amount has not a
        sign so you have to know the position on the line whether it is debit
        or credit, see also function get_debit_credit.

        II) The description column on the first line contains on the left the
        name (payee) and on the right a check number. Lines 2 and so on
        determine the memo field.

        III) The DATE COMPTA is used as the date field.

        IV) Amounts follow the french format: a space as the thousands
        separator and a comma as the decimal separator.

        V) A transaction may be spread over several lines like this (columns
        left trimmed and separated by a bar):

DATE  |                                          |DATE     |DATE  |DEBIT|CREDIT
COMPTA|                                          |
      |LIBELLE/REFERENCE                         |OPERATION|VALEUR|EUROS|EUROS
======|==========================================|=========|======|=====|======
 20/06|PRLV SEPA AUTOROUTES DU            YYYYYYY|20/06    |20/06 |43,70|
      |XXXXXXXXXXXXXXXXXXXX XXXXXX
      |YYYYYYYYYYYYYYYYYYY

        VI) Or what do you think of these two transactions?

       Example 1:

 13/06|PRLV SEPA AVANSSUR                 ZZZZZZZ|13/06    |13/06 |     |30,99
      |Direct Assurance 999999999

      |F FRAIS/VIREMENT
      |AAAAAAAAAAA
 13/06|                                   BBBBBBB|13/06    |13/06 |     |4,10
      |00001 OPERATION

        Which are actually:

 13/06|PRLV SEPA AVANSSUR                 ZZZZZZZ|13/06    |13/06 |     |30,99
      |Direct Assurance 999999999
      |AAAAAAAAAAA
 13/06|FRAIS/VIREMENT                     BBBBBBB|13/06    |13/06 |     |4,10
      |00001 OPERATION

        But due to an image in the PDF the lines are spread out wrongly by
        pdftotext. The image is converted into an empty line and an F followed
        by whitespace above (!) the rest of the current memo.

        Example 2:

 26/09|F COTIS AFFINEA
      |XCCNV999 2019092500010929000001
      |                                   0010929|25/09    |25/09|7,18  |

      |F COTIS AFFINEA
      |CONTRAT CNV0004207796
 26/09|                                   0010930|25/09    |25/09|12,18 |
      |XCCNV999 2019092500010930000001
      |CONTRAT CNV0004207797

        which should be actually:

 26/09|COTIS AFFINEA                      0010929|25/09    |25/09|7,18  |
      |XCCNV999 2019092500010929000001
      |CONTRAT CNV0004207796

 26/09|COTIS AFFINEA                      0010930|25/09    |25/09|12,18 |
      |XCCNV999 2019092500010930000001
      |CONTRAT CNV0004207797

        This should be solved by matching a transaction over this line
        and the second line after that, a lookahead.

        VII) In this case the second part (DEBIT DIFFERE) off the description
        line is not a check number but just part of the name. There is a
        bandwith for the check number. Some heuristics show that the start
        of the reference number + 19 is at least the position of the operation
        date column. Let's make 20 the threshold.

 28/06|CARTE     DEBIT DIFFERE                    |28/06    |30/06 |6,70 |


        """
        def convert_str_to_list(str: str,
                                max_items: Optional[int] = None,
                                sep: str = r'\s\s+|\t|\n') -> List[str]:
            return [x for x in re.split(sep, str)[0:max_items]]

        def get_debit_credit(line: str, amount: str, credit_pos: int) -> str:
            return 'C' if line.rfind(amount) >= credit_pos else 'D'

        def get_amount(amount_in: Decimal,
                       transaction_type_in: str) -> Decimal:
            sign_out: int = 1
            amount_out: Optional[Decimal] = None

            # determine sign_out
            assert isinstance(transaction_type_in, str)
            assert transaction_type_in in ['D', 'C']

            if transaction_type_in == 'D':
                sign_out = -1

            # determine amount_out
            assert isinstance(amount_in, str)
            # Amount may be something like 1 827,97
            m = re.search(r'^([ ,0-9]+)$', amount_in)
            assert m is not None
            amount_out = m.group(1)
            if amount_out[-3] == ',':
                amount_out = amount_out.replace(' ', '').replace(',', '.')

            # convert to str to keep just the last two decimals
            amount_out = Decimal(str(amount_out))

            return sign_out * amount_out

        F_pattern: Pattern[str] = re.compile(r'(F\s+)')
        account_id_pattern: Pattern[str] = re.compile(r'VOTRE .* N° (\d+)')
        bank_id_pattern: Pattern[str]
        bank_id_pattern = re.compile(r'IBAN\s+(\S.+\S)\s+BIC\s+(\S+)')
        # The first header row should appear like that but the second
        # is spread out over two lines.
        header_rows: List[List[str]] = [
            ['DATE', 'DATE', 'DATE', 'DEBIT', 'CREDIT'],
            [
                'COMPTA', 'LIBELLE/REFERENCE', 'OPERATION', 'VALEUR', 'EUROS',
                'EUROS'
            ]
        ]
        second_header_row: List[str] = []
        accounting_date_pos: Optional[int] = None  # DATE COMPTA
        description_pos: Optional[int] = None  # LIBELLE/REFERENCE
        operation_date_pos: Optional[int] = None  # DATE OPERATION
        value_date_pos: Optional[int] = None  # DATE VALEUR
        debit_pos: Optional[int] = None
        credit_pos: Optional[int] = None
        # 20 before DATE OPERATION (guessed, see note VII)
        check_no_pos: Optional[int] = None

        balance_pattern: Pattern[str] = \
            re.compile(r'SOLDE (CRED|DEB)ITEUR AU (../../....).\s+([ ,0-9]+)$')
        transaction_pattern: Pattern[str] = \
            re.compile(r'\d\d/\d\d\s+\S.*\s+\d\d/\d\d\s+\d\d/\d\d\s+[ ,0-9]+$')

        read_end_balance_line: bool = False

        stmt_line: Optional[StatementLine] = None
        stmt_lines: List[StatementLine] = []
        payee: Optional[str] = None  # to handle note VI

        # Need to be able to loook ahead for complicated cases
        lines: List[str] = [line for line in self.fin]

        pos: int
        m: Optional[Match[str]]
        transaction_type: str
        line_stripped: str
        accounting_date: date
        balance: Union[str, Decimal]
        row: List[str]
        combined_line: str

        # breakpoint()
        for idx, line in enumerate(lines, start=1):
            line_stripped = line.strip()
            if line_stripped != '':
                logger.debug('line %04d: %s', idx, line)

                pos = line_stripped.find('TOTAL DES MOUVEMENTS')

                if pos == 0:  # found
                    read_end_balance_line = True
                    continue

            if not self.statement.account_id:
                m = account_id_pattern.match(line_stripped)
                if m:
                    self.statement.account_id = m.group(1)
                    self.unique_id_sets[self.statement.account_id] = set()
                    logger.debug('account_id: %s', self.statement.account_id)
                continue

            if not self.statement.bank_id:
                m = bank_id_pattern.match(line_stripped)
                if m:
                    self.statement.bank_id = m.group(2)
                    logger.debug('bank_id: %s', self.statement.bank_id)
                elif self.bank_id:
                    self.statement.bank_id = self.bank_id
                    logger.debug('bank_id: %s', self.statement.bank_id)
                continue

            assert self.statement.account_id and self.statement.bank_id

            m = balance_pattern.match(line_stripped)
            if m:
                accounting_date = datetime.strptime(m.group(2),
                                                    '%d/%m/%Y').date()
                balance = m.group(3)
                logger.debug('accounting_date: %s; balance: %s',
                             accounting_date, balance)
                assert credit_pos is not None
                transaction_type = get_debit_credit(line, balance, credit_pos)
                balance = cast(Decimal, balance)
                if read_end_balance_line:
                    self.statement.end_balance = get_amount(
                        balance, transaction_type)
                    self.statement.end_date = accounting_date
                    break
                elif self.statement.start_balance is None:
                    self.statement.start_balance = get_amount(
                        balance, transaction_type)
                    self.statement.start_date = accounting_date
                continue

            row = convert_str_to_list(line_stripped)

            if row == header_rows[0]:
                logger.debug('header row 1: %s', str(row))
                debit_pos = line.find('DEBIT')
                assert debit_pos >= 0
                credit_pos = line.find('CREDIT')
                assert credit_pos >= 0
                # Create a copy
                second_header_row = header_rows[1][:]
                logger.debug('second header row: %s', str(second_header_row))
                continue
            elif second_header_row:
                row = convert_str_to_list(line_stripped, sep=r'\s+|\t|\n')
                logger.debug('header row 2/3: %s', str(row))
                # Are the columns of this row a subset of header_rows[1]?
                if set(row) < set(header_rows[1]):
                    for col in row:
                        if col == 'COMPTA':
                            accounting_date_pos = line.find(col)
                            assert accounting_date_pos >= 0
                        elif col == 'LIBELLE/REFERENCE':
                            description_pos = line.find(col)
                            assert description_pos >= 0
                        elif col == 'OPERATION':
                            operation_date_pos = line.find(col)
                            assert operation_date_pos >= 0
                            check_no_pos = operation_date_pos - 20
                            assert check_no_pos >= 0
                        elif col == 'VALEUR':
                            value_date_pos = line.find(col)
                            assert value_date_pos >= 0
                        elif col == 'EUROS':
                            pass
                        second_header_row.remove(col)
                logger.debug('second header row: %s', str(second_header_row))
                continue
            elif len(row[0]) > 0:
                logger.debug('row: %s', str(row))

            # Empty line
            if len(row) == 1 and row[0] == '':
                if payee is None:
                    # Note VI: first, empty line
                    payee = ''
                elif payee != '':  # pragma: no cover
                    # Obviously an empty line after an F\s+ line
                    payee = None
                else:
                    pass  # several empty lines before an F line possible
                logger.debug('payee: %s', payee)
                continue
            # Handle note VI
            elif payee == '' and len(row) == 1 and F_pattern.match(row[0]):
                # Note VI: second line left trimmed starting with F
                payee = row[0][2:]
                logger.debug('payee: %s', payee)
                continue
            else:
                logger.debug('payee: %s', payee)

            m = transaction_pattern.match(line_stripped)

            # See note VI, example 2
            if not m and\
               idx + 2 <= len(lines) and\
               len(row) >= 2 and\
               (F_pattern.match(row[1]) or (len(row) >= 3 and row[1] == 'F')):
                assert line == lines[idx - 1]
                # The first line right stripped and the 'F\s+' replaced by ''
                combined_line = lines[idx - 1].rstrip()
                m = F_pattern.search(combined_line)
                assert m
                combined_line = combined_line.replace(m.group(1), '')
                # Add the second line (two rows further) from the point
                # where the first right trimmed line ends, but only if
                # the part before that point contains just whitespace.
                if lines[idx - 1 + 2][0:len(combined_line)].strip() == '':
                    combined_line += lines[idx - 1 + 2][len(combined_line):]
                    logger.debug('combined line stripped: %s',
                                 combined_line.strip())
                    m = transaction_pattern.match(combined_line.strip())
                    if m:
                        del lines[idx - 1 + 2]  # not necessary anymore
                        # recalculate some helper variables
                        line = combined_line
                        line_stripped = line.strip()
                        row = convert_str_to_list(line_stripped)

            if m:
                logger.debug('found a transaction line')

                assert debit_pos is not None and debit_pos >= 0
                assert credit_pos is not None and credit_pos >= 0
                assert accounting_date_pos is not None \
                    and accounting_date_pos >= 0
                assert description_pos is not None and description_pos >= 0
                assert value_date_pos is not None and value_date_pos >= 0

                # emit previous transaction if any
                if stmt_line is not None:
                    stmt_lines.append(stmt_line)
                    stmt_line = None

                # Note VI
                if payee is not None and payee != '':
                    row.insert(1, payee)
                    payee = None
                    logger.debug('After adding payee to the row: %s', str(row))

                stmt_line = StatementLine()
                if len(row) >= 6:
                    pos = line.find(row[-4])
                    assert check_no_pos is not None
                    if pos >= check_no_pos:
                        stmt_line.check_no = row[-4]
                        logger.debug('Setting check_no: %s', row[-4])
                    else:
                        logger.debug('Skip setting check_no')

                stmt_line.accounting_date = row[0]
                stmt_line.operation_date = row[-2]
                stmt_line.value_date = row[-3]
                stmt_line.amount = cast(Decimal, row[-1])
                assert credit_pos is not None
                transaction_type = \
                    get_debit_credit(line, row[-1], credit_pos)

                # Should have 6 columns. If not: reduce.
                if len(row) > 6:  # pragma: no cover
                    while len(row) > 6:
                        row[2] += ' ' + row[3]
                        del row[3]
                    logger.debug('row after reducing columns: %s', str(row))

                stmt_line.payee = row[1]
                stmt_line.memo = ''
                stmt_line.amount = get_amount(stmt_line.amount,
                                              transaction_type)
                logger.debug('Statement line: %r', stmt_line)
            elif stmt_line is not None:
                assert accounting_date_pos is not None
                assert operation_date_pos is not None
                # Continuation of a transaction?
                # Or stated otherwise does the memo text completely fit
                # in the second column?
                pos = line.find(line_stripped)
                if pos > accounting_date_pos and\
                   pos + len(line_stripped) < operation_date_pos:
                    if stmt_line.memo == '':
                        stmt_line.memo = line_stripped
                    else:
                        stmt_line.memo += " " + line_stripped

        # end of while loop
        # assert self.statement.account_id, "No account id found."
        # assert self.statement.bank_id, "No bank id found."
        # assert stmt_lines, "No statement lines found."
        if stmt_line is not None:
            stmt_lines.append(stmt_line)
        # We can only yield the statement lines when end_date is there,
        # see function get_date() below
        return (sl for sl in stmt_lines)