def parse_record(self, line): """Parse given transaction line and return StatementLine object """ #print(line) sl = StatementLine() sl = super(PostFinanceBankParser, self).parse_record(line) if not line[3]: sl.amount = self.parse_float(line[2]) sl.trntype = 'CREDIT' else: sl.amount = self.parse_float("-" + line[3]) sl.trntype = 'DEBIT' sl.payee = sl.memo if payeesplit: #TODO: enable via settings found = False for regex in payeefinder: m = regex.match(sl.payee) if m: found = True sl.payee = m.group(1) #if not found: # print ("Found No PAYEE!" + sl.memo) return sl
def to_statement_line(self, raw_records): ret =[] for record in raw_records: date = dateparser.parse(record['Data']) memo = record['Detalii tranzactie'] line = StatementLine(date=date, memo=memo) if 'Credit' in record: line.amount = D(record['Credit'].replace('.','').replace(',','.')) line.trntype = 'CREDIT' elif 'Debit' in record: line.amount = D(record['Debit'].replace('.','').replace(',','.')) line.trntype='DEBIT' else: raise ArgumentError if line.trntype=='CREDIT': r = re.compile('ordonator:?(?P<payee>.*)$', re.MULTILINE| re.IGNORECASE) m = r.search( memo) if m: d = m.groupdict() line.payee = d['payee'] line.trntype='XFER' #r = re.compile('din contul:?(?P<payee>.*)$', re.MULTILINE| re.IGNORECASE) #m = r.search( memo) #if m: # d = m.groupdict() # line.payee = d['payee'] r = re.compile('referinta:?(?P<refnum>.*)$', re.MULTILINE| re.IGNORECASE) m = r.search( memo) if m: d = m.groupdict() line.refnum = line.check_no = d['refnum'] line.id= generate_transaction_id(line) ret.append(line) return ret
def parse_record(self, record): P = self.P sl = StatementLine() sl.date = self.parse_datetime(record.findall(P('ValDt', 'Dt'))[0].text) sl.amount = float(record.findall(P('Amt'))[0].text) sl.trntype = 'DEBIT' if (record.findall(P('CdtDbtInd'))[0].text == "DBIT") else 'CREDIT' if sl.trntype == 'DEBIT' and sl.amount > 0: sl.amount = -sl.amount sl.payee = record.findall( P('NtryDtls', 'TxDtls', 'RltdPties', 'Cdtr', 'Nm'))[0].text sl.memo = ' '.join([ e.text for e in record.findall(P('NtryDtls', 'TxDtls', 'RmtInf', 'Ustrd')) ]) # generate unique id h = hashlib.sha256() h.update(str(sl.date).encode('utf-8')) h.update(str(sl.amount).encode('utf-8')) h.update(str(sl.trntype).encode('utf-8')) h.update(str(sl.payee).encode('utf-8')) h.update(str(sl.memo).encode('utf-8')) sl.id = h.hexdigest() return sl
def parse_record(self, row): row = take(5, row) stmt_line = StatementLine() stmt_line.date = self.parse_datetime(row[0]) _ = self.parse_datetime(row[1]) # TODO: ??? stmt_line.refnum = row[2] stmt_line.memo = row[3] stmt_line.amount = row[4] # # Looks like SEB formats description for card transactions so it includes the actual purchase date # within e.g. 'WIRSTRÖMS PU/14-12-31' and it means that description is 'WIRSTRÖMS PU' while the actual # card operation is 2014-12-31. # # P.S. Wirströms Irish Pub is our favorite pub in Stockholm: http://www.wirstromspub.se # m = re.match('(.*)/([0-9]{2}-[0-9]{2}-[0-9]{2})$', stmt_line.memo) if m: card_memo, card_date = m.groups() if self.brief: stmt_line.memo = card_memo stmt_line.date_user = datetime.strptime(card_date, '%y-%m-%d') stmt_line.id = generate_transaction_id(stmt_line) return stmt_line
def parse_record(self, row): id_idx = self.valid_header.index("Transaction ID") date_idx = self.valid_header.index("Date") memo_idx = self.valid_header.index("Name") refnum_idx = self.valid_header.index("Reference Txn ID") amount_idx = self.valid_header.index("Gross") payee_idx = self.valid_header.index("To Email Address") title_idx = self.valid_header.index("Item Title") stmt_line = StatementLine() stmt_line.id = row[id_idx] stmt_line.date = datetime.strptime(row[date_idx], self.date_format) stmt_line.memo = row[memo_idx] if self.analyze: memo_parts = [row[memo_idx]] payee = row[payee_idx] if payee and (payee.lower() == '*****@*****.**'): memo_parts.append(row[title_idx]) stmt_line.memo = ' / '.join(filter(bool, memo_parts)) stmt_line.refnum = row[refnum_idx] stmt_line.amount = atof(row[amount_idx].replace(" ", ""), self.locale) return stmt_line
def parse_record(self, line): # FIXME: add header validation if self.cur_record < 2: return None if len(line) < 3: """e.g.: ['# 1 vorgemerkte Umsätze nicht angezeigt']""" return None if not line[2]: return None sl = StatementLine() sl.id = line[1] sl.date = self.parse_datetime(line[2]) sl.amount = self.parse_float(line[4]) sl.trntype = 'DEBIT' if sl.amount < 0 else 'CREDIT' sl.payee = line[7] # check for special transactions if line[6] == "Entgeltabschluss": sl.memo = "%s: %s %s" % (line[6], line[13], line[14]) elif line[6] == "Wertpapiere" or line[7] == "KREDITKARTENABRECHNUNG": sl.memo = "(%s/%s): %s" % (line[8], line[9], " ".join(line[15:]).strip()) elif not line[8] and not line[9]: # empty transaction print("empty", line) return None else: sl.memo = "(%s/%s): %s" % (line[8], line[9], " ".join(e for e in line[13:] if e).strip()) return sl
def parse_record(self, df_row): """Parse given transaction line and return StatementLine object """ stmt_line = StatementLine() # date field stmt_line.date = self.xls_date( self.df['Data Contabile'][self.df_row_idx]) # amount field stmt_line.amount = self.df['Importo'][self.df_row_idx] # transaction type field if(stmt_line.amount < 0): stmt_line.trntype = "DEBIT" else: stmt_line.trntype = "CREDIT" # memo field stmt_line.memo = self.df['Causale / Descrizione'][self.df_row_idx] if(pd.isnull(stmt_line.memo)): stmt_line.memo = '' # id field stmt_line.id = generate_transaction_id(stmt_line) #print(str(stmt_line)) return stmt_line
def _parse_line(self, ntry): sline = StatementLine() crdeb = _find(ntry, 'CdtDbtInd').text amtnode = _find(ntry, 'Amt') amt = self._parse_amount(amtnode, self.statement.currency) if crdeb == CD_DEBIT: amt = -amt payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Cdtr/Nm') else: payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Dbtr/Nm') sline.payee = payee.text sline.amount = amt dt = _find(ntry, 'ValDt') sline.date = self._parse_date(dt) bookdt = _find(ntry, 'BookgDt') sline.date_user = self._parse_date(bookdt) svcref = _find(ntry, 'NtryDtls/TxDtls/Refs/AcctSvcrRef') sline.refnum = svcref.text rmtinf = _find(ntry, 'NtryDtls/TxDtls/RmtInf/Ustrd') sline.memo = rmtinf.text return sline
def parse_record(self, line): if line[0] == "Kontonummer": # it's the table header return None if len(line) < 3: """e.g.: ['# 1 vorgemerkte Umsätze nicht angezeigt']""" return None if not line[2]: return None if self.statement.account_id is None: self.statement.account_id = line[0] sl = StatementLine() sl.id = line[1] sl.date = self.parse_datetime(line[2]) sl.date_avail = self.parse_datetime(line[3]) sl.amount = self.parse_float(line[4]) sl.trntype = TMAPPINGS.get(line[5], 'DEBIT' if sl.amount < 0 else 'CREDIT') sl.payee = line[7][:32] sl.memo = "%s: %s" % (line[6], " ".join(x for x in line[13:31] if len(x) > 0)) if len(line[8]) > 0 and len(line[9]) > 0: # additional bank information is present splitted = self.parse_iban(line[8]) if splitted: sl.bank_account_to = BankAccount(**splitted) else: sl.bank_account_to = BankAccount(line[9], line[8]) return sl
def testAllFilled(self): """All of the fields used in the hash have a value""" sline = StatementLine() sline.date = date(2015, 10, 2) sline.memo = "Test statement line" sline.amount = 123.45 self.assertEqual(self.all_hash, generate_stable_transaction_id(sline))
def parse_record(self, line): if self.cur_record <= 2: # Skip header line return None if not self.statement.account_id: self.statement.account_id = line[16] # Get all fields type_code = line[12] date = line[1] date_user = line[11] c_or_d = line[14] amount = self.parse_float(line[3]) id = line[10] refnum = line[10] note = line[9] payee_name = line[4] currency = line[17] # Convert LVL to EUR if currency == 'LVL': currency = 'EUR' amount = round(float(amount) / 0.702804, 2) # Create a statement line stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount)) stmt_line.payee = payee_name stmt_line.refnum = refnum stmt_line.date_user = self.parse_datetime(date_user) # Credit & Debit stuff stmt_line.trntype = "DEP" if c_or_d == 'D': stmt_line.amount = -stmt_line.amount stmt_line.trntype = "DEBIT" # Various types if 'PMNTCCRDCWDL' in type_code: stmt_line.trntype = "ATM" elif 'ACMTMDOPFEES' in type_code: stmt_line.trntype = "SRVCHG" elif 'LDASCSLNINTR' in type_code: stmt_line.trntype = "INT" elif 'PMNTCCRDOTHR' in type_code: stmt_line.trntype = "PAYMENT" m = self.card_purchase_re.match(stmt_line.memo) if m: # This is an electronic purchase. Extract check number from the memo field stmt_line.check_no = m.group(1) elif 'PMNTRCDTESCT' in type_code or 'PMNTICDTESCT' in type_code: stmt_line.trntype = "XFER" # DEBUG if self.debug: print(stmt_line, stmt_line.trntype) return stmt_line
def parse_record(self, row): stmt_line = StatementLine() stmt_line.date = self.parse_datetime(row[0]) stmt_line.date_user = self.parse_datetime(row[1]) stmt_line.memo = row[2] stmt_line.amount = self.parse_float(row[3]) stmt_line.id = generate_transaction_id(stmt_line) return stmt_line
def parse_record(self, line): """Parse given transaction line and return StatementLine object """ #print(line) sl = StatementLine() sl = super(PostFinanceCreditParser, self).parse_record(line) #print(line) if not line[3]: sl.amount = self.parse_float(line[4]) sl.trntype = 'CREDIT' else: sl.amount = self.parse_float("-" + line[3]) sl.trntype = 'DEBIT' sl.payee = sl.memo return sl
def parse_record(self, line): """ Parse given transaction line and return StatementLine object """ sl = StatementLine() sl.amount = Decimal(line['Betrag'].replace(',', '.')) # TODO trntype could be improved using 'Buchungstext' if sl.amount.is_signed(): sl.trntype = 'DEBIT' else: sl.trntype = 'CREDIT' # .date: It is debatable whether to use 'Buchungstag' or 'Valutadatum' sl.date = self.parse_datetime(line['Valutadatum']) # .date_user is not contained in the original CSV # .payee becomes OFX.NAME which becomes "Description" in gnuCash # .memo becomes OFX.MEMO which becomes "Notes" in gnuCash # When .payee is empty, GnuCash imports .memo to "Description" and # keeps "Notes" empty # # OFX's <NAME> and <PAYEE> are distinct fields. But ofxstatement's # .payee is translated to OFX's <NAME> # # According to the OFX spec (version 2.1.1): # <NAME> Name of payee or description of transaction, A-32 # Note: Provide NAME or PAYEE, not both # <MEMO> Extra information (not in <NAME>) # # I prefer to have a description in .payee because that's what it ends # up being in gnuCash. recipient = _truncate_str(line['Beguenstigter/Zahlungspflichtiger']) if not recipient: recipient = 'UNBEKANNT' sl.payee = "{}; {}".format( _truncate_str(line['Verwendungszweck']), recipient, ) sl.memo = "{}; IBAN: {}; BIC: {}".format( line['Buchungstext'].strip(), line['Kontonummer/IBAN'].strip(), line['BIC (SWIFT-Code)'].strip(), ) m = sha256() m.update(str(sl.date).encode('utf-8')) m.update(sl.payee.encode('utf-8')) m.update(sl.memo.encode('utf-8')) m.update(str(sl.amount).encode('utf-8')) # Shorten the hash to the first 16 digits just to make it more # manageable. It should still be enough. sl.id = str(abs(int(m.hexdigest(), 16)))[:16] return sl
def parse_record(self, line): if self.cur_record < 2: return None m = None parse_info = None if len(line) == 11: m = self.mt940_mappings parse_info = self.parse_transaction_info_mt940 elif len(line) == 17: m = self.camt_mappings parse_info = self.parse_transaction_info_camt else: raise ValueError("invalid input line: '%s'" % line) if self.statement.account_id is None: self.statement.account_id = line[m["accid"]] sl = StatementLine() sl.date_avail = self.parse_datetime(line[m["valdate"]]) if len(line[m["date"]]) > 0: sl.date = self.parse_datetime(line[m["date"]]) else sl.date = sl.date_avail sl.amount = self.parse_float(line[m["amount"]]) sl.trntype = self.parse_transaction_type(sl.amount, line[m["btext"]]) # remove leading or all) zeros line[m["toaccid"]] = line[m["toaccid"]].lstrip('0') if len(line[m["toaccid"]]) > 0 and len(line[m["tobankid"]]) > 0: # additional bank information is present splitted = self.parse_iban(line[m["toaccid"]]) if splitted: sl.bank_account_to = BankAccount(**splitted) else: sl.bank_account_to = BankAccount(line[m["tobankid"]], line[m["toaccid"]]) if line[m["currency"]] != self.statement.currency: # different currency is used sl.currency = line[m["currency"]] # remove additional spaces in the payee sl.payee = re.sub(' +', ' ', line[m["payee"]])[:32] info = parse_info(line) # remove additional spaces in the memo sl.memo = "%s: %s" % (line[m["btext"]], re.sub(' +', ' ', info["memo"].strip())) # we need to generate an ID because nothing is given sl.id = generate_stable_transaction_id(sl) return sl
def parse_record(self, line): # Free Headerline if self.cur_record <= 1: return None c = self.columns stmt_line = StatementLine() stmt_line.date = self.parse_datetime(line[c["Completed Date"]].strip()) # Amount paid_out = -self.parse_amount(line[c["Paid Out (...)"]]) paid_in = self.parse_amount(line[c["Paid In (...)"]]) stmt_line.amount = paid_out or paid_in reference = line[c["Reference" if "Reference" in c.keys() else "Description"]].strip() trntype = False for prefix, transaction_type in TRANSACTION_TYPES.items(): if reference.startswith(prefix): trntype = transaction_type break if not trntype: trntype = 'POS' # Default: Debit card payment # It's ... pretty ugly, but I see no other way to do this than parse # the reference string because that's all the data we have. stmt_line.trntype = trntype if trntype == 'POS': stmt_line.payee, stmt_line.memo = self.parse_payee_memo(reference) elif reference.startswith('Contanti'): stmt_line.payee, stmt_line.memo = self.parse_payee_memo( reference[8:]) elif reference.startswith('To ') or reference.startswith('From '): stmt_line.payee = self.parse_value( reference[reference.find(' '):], 'payee' ) else: stmt_line.memo = self.parse_value(reference, 'memo') # Notes if "Notes" in c.keys(): if not stmt_line.memo: stmt_line.memo = u'' elif len(stmt_line.memo.strip()) > 0: stmt_line.memo += u' ' stmt_line.memo += u'({})'.format(line[c["Notes"]].strip()) # Generate a unique ID balance = self.parse_amount(line[c["Balance (...)"]]) stmt_line.id = md5(f"{stmt_line.date}-{stmt_line.payee}-{stmt_line.amount}-{balance}".encode())\ .hexdigest() return stmt_line
def parse_record(self, row): self.row_num += 1 line = StatementLine() line.date = self.parse_datetime(row[0].value) line.date_user = self.parse_datetime(row[1].value) line.refnum = str(self.row_num) line.memo = row[2].value line.amount = row[3].value line.trntype = self.get_type(line) line.id = generate_transaction_id(line) return line
def parse_record(self, line): if len(line) < 5: return None elif len(line) < 12: # possibly meta information about the account if "BLZ" in line[0]: self.statement.bank_id = line[1] elif "Konto" in line[0]: self.statement.account_id = line[1] return None if line[9] == "Anfangssaldo": self.statement.start_date = self.parse_datetime(line[0]) self.statement.start_balance = self.parse_float(line[11]) return None elif line[9] == "Endsaldo": self.statement.end_date = self.parse_datetime(line[0]) self.statement.end_balance = self.parse_float(line[11]) return None elif line[0] == "Buchungstag": # it's the table header return None sl = StatementLine() sl.date = self.parse_datetime(line[0]) sl.date_avail = self.parse_datetime(line[1]) # Note: amount has no sign. We need to guess it later... sl.amount = self.parse_float(line[11]) info = self.parse_transaction_info(line) sl.amount *= info["sign"] sl.trntype = info["ttype"] if "iban" in info: # additional bank information if present sl.bank_account_to = BankAccount(**self.parse_iban(info["iban"])) if line[10] != self.statement.currency: # different currency is used sl.currency = line[10] # remove additional spaces in the payee sl.payee = re.sub(" +", " ", line[3].replace("\n", " ").strip())[:32] # remove additional spaces in the memo sl.memo = re.sub(" +", " ", info["memo"].strip()) # we need to generate an ID because nothing is given sl.id = generate_stable_transaction_id(sl) return sl
def parse_record(self, line): # Namespace stuff namespaces = {'ns': line.tag[1:].partition("}")[0]} # Get all fields type_code = line.find('ns:TypeCode', namespaces=namespaces).text date = line.find('ns:BookDate', namespaces=namespaces).text c_or_d = line.find('ns:CorD', namespaces=namespaces).text amount = line.find('ns:AccAmt', namespaces=namespaces).text id = line.find('ns:BankRef', namespaces=namespaces).text note = line.find('ns:PmtInfo', namespaces=namespaces).text # Payee name payee_name = None payee = line.find('ns:CPartySet', namespaces=namespaces) if payee: payee_account = payee.find('ns:AccHolder', namespaces=namespaces) if payee_account: payee_name = payee_account.find('ns:Name', namespaces=namespaces).text # Create statement line stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount)) stmt_line.payee = payee_name # Credit & Debit stuff stmt_line.trntype = "DEP" if c_or_d == 'D': stmt_line.amount = -stmt_line.amount stmt_line.trntype = "DEBIT" # Various types if type_code == 'CHOU': stmt_line.trntype = "ATM" elif type_code == 'MEMD': stmt_line.trntype = "SRVCHG" elif type_code == 'OUTP': stmt_line.trntype = "PAYMENT" elif type_code == 'INP': stmt_line.trntype = "XFER" # # Check if paid by card # m = CARD_PURCHASE_RE.match(stmt_line.memo) # if m: # # this is an electronic purchase. extract some useful # # information from memo field # date = m.group(1).split('/') # date = '%s-%s-%s' % (date[2], date[1], date[0]) # stmt_line.date_user = self.parse_datetime(date) print(stmt_line, stmt_line.trntype) return stmt_line
def parse_record(self, line): # Namespace stuff namespaces = {'ns': line.tag[1:].partition("}")[0]} # Get all fields type_code = line.find('ns:TypeCode', namespaces=namespaces).text date = line.find('ns:BookDate', namespaces=namespaces).text c_or_d = line.find('ns:CorD', namespaces=namespaces).text amount = line.find('ns:AccAmt', namespaces=namespaces).text id = line.find('ns:BankRef', namespaces=namespaces).text note = line.find('ns:PmtInfo', namespaces=namespaces).text # Payee name payee_name = None payee = line.find('ns:CPartySet', namespaces=namespaces) if payee: payee_account = payee.find('ns:AccHolder', namespaces=namespaces) if payee_account: payee_name = payee_account.find('ns:Name', namespaces=namespaces).text # Create statement line stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount)) stmt_line.payee = payee_name # Credit & Debit stuff stmt_line.trntype = "DEP" if c_or_d == 'D': stmt_line.amount = -stmt_line.amount stmt_line.trntype = "DEBIT" # Various types if type_code == 'MEMD': stmt_line.trntype = "SRVCHG" elif type_code == 'OUTP': stmt_line.trntype = "PAYMENT" # Check if paid by card m = CARD_PURCHASE_RE.match(stmt_line.memo) if m: # this is an electronic purchase. extract some useful # information from memo field date = m.group(1).split('/') date = '%s-%s-%s' % (date[2], date[1], date[0]) stmt_line.date_user = self.parse_datetime(date) # DEBUG if self.debug: print(stmt_line, stmt_line.trntype) return stmt_line
def _parse_line(self, ntry: ET.Element) -> Optional[StatementLine]: sline = StatementLine() crdeb = self._findstrict(ntry, "CdtDbtInd").text amtnode = self._findstrict(ntry, "Amt") amt_ccy = amtnode.get("Ccy") if amt_ccy != self.statement.currency: # We can't include amounts with incompatible currencies into the # statement. return None amt = self._parse_amount(amtnode) if crdeb == CD_DEBIT: amt = -amt payee = self._find(ntry, "NtryDtls/TxDtls/RltdPties/Cdtr/Nm") else: payee = self._find(ntry, "NtryDtls/TxDtls/RltdPties/Dbtr/Nm") sline.payee = payee.text if payee is not None else None sline.amount = amt dt = self._find(ntry, "ValDt") sline.date = self._parse_date(dt) bookdt = self._find(ntry, "BookgDt") sline.date_user = self._parse_date(bookdt) svcref = self._find(ntry, "NtryDtls/TxDtls/Refs/AcctSvcrRef") if svcref is None: svcref = self._find(ntry, "AcctSvcrRef") if svcref is not None: sline.refnum = svcref.text # Try to find memo from different possible locations refinf = self._find(ntry, "NtryDtls/TxDtls/RmtInf/Strd/CdtrRefInf/Ref") rmtinf = self._find(ntry, "NtryDtls/TxDtls/RmtInf/Ustrd") addinf = self._find(ntry, "AddtlNtryInf") if refinf is not None: sline.memo = refinf.text elif rmtinf is not None: sline.memo = rmtinf.text elif addinf is not None: sline.memo = addinf.text return sline
def parse_record(self, line): # Free Headerline if self.cur_record <= 1: return None stmt_line = StatementLine() stmt_line.date = self.parse_datetime(line[0].strip()) # Amount paid_out = -self.parse_amount(line[2]) paid_in = self.parse_amount(line[3]) stmt_line.amount = paid_out or paid_in reference = line[1].strip() trntype = False for prefix, transaction_type in TRANSACTION_TYPES.items(): if reference.startswith(prefix): trntype = transaction_type break if not trntype: trntype = 'POS' # Default: Debit card payment # It's ... pretty ugly, but I see no other way to do this than parse # the reference string because that's all the data we have. stmt_line.trntype = trntype if trntype == 'POS': stmt_line.payee, stmt_line.memo = self.parse_payee_memo(reference) elif reference.startswith('Cash at '): stmt_line.payee, stmt_line.memo = self.parse_payee_memo( reference[8:]) elif reference.startswith('To ') or reference.startswith('From '): stmt_line.payee = self.parse_value( reference[reference.find(' '):], 'payee' ) else: stmt_line.memo = self.parse_value(reference, 'memo') # Notes (from Apr-2018) if len(line) > 8 and line[8].strip(): if not stmt_line.memo: stmt_line.memo = u'' elif len(stmt_line.memo.strip()) > 0: stmt_line.memo += u' ' stmt_line.memo += u'({})'.format(line[8].strip()) return stmt_line
def parse_record(self, line): # Namespace stuff namespaces = {"ns": line.tag[1:].partition("}")[0]} # Get all fields type_code = line.find("ns:TypeCode", namespaces=namespaces).text date = line.find("ns:BookDate", namespaces=namespaces).text c_or_d = line.find("ns:CorD", namespaces=namespaces).text amount = line.find("ns:AccAmt", namespaces=namespaces).text id = line.find("ns:BankRef", namespaces=namespaces).text note = line.find("ns:PmtInfo", namespaces=namespaces).text # Payee name payee_name = None payee = line.find("ns:CPartySet", namespaces=namespaces) if payee: payee_account = payee.find("ns:AccHolder", namespaces=namespaces) if payee_account: payee_name = payee_account.find("ns:Name", namespaces=namespaces).text # Create statement line stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount)) stmt_line.payee = payee_name # Credit & Debit stuff stmt_line.trntype = "DEP" if c_or_d == "D": stmt_line.amount = -stmt_line.amount stmt_line.trntype = "DEBIT" # Various types if type_code == "MEMD": stmt_line.trntype = "SRVCHG" elif type_code == "OUTP": stmt_line.trntype = "PAYMENT" # Check if paid by card m = CARD_PURCHASE_RE.match(stmt_line.memo) if m: # this is an electronic purchase. extract some useful # information from memo field date = m.group(1).split("/") date = "%s-%s-%s" % (date[2], date[1], date[0]) stmt_line.date_user = self.parse_datetime(date) # print(stmt_line) return stmt_line
def _parse_line(self, ntry): sline = StatementLine() crdeb = self._find(ntry, 'CdtDbtInd').text amtnode = self._find(ntry, 'Amt') amt_ccy = amtnode.get('Ccy') if amt_ccy != self.statement.currency: # We can't include amounts with incompatible currencies into the # statement. return None amt = self._parse_amount(amtnode) if crdeb == CD_DEBIT: amt = -amt payee = self._find(ntry, 'NtryDtls/TxDtls/RltdPties/Cdtr/Nm') else: payee = self._find(ntry, 'NtryDtls/TxDtls/RltdPties/Dbtr/Nm') sline.payee = payee.text if payee is not None else None sline.amount = amt dt = self._find(ntry, 'ValDt') sline.date = self._parse_date(dt) bookdt = self._find(ntry, 'BookgDt') sline.date_user = self._parse_date(bookdt) svcref = self._find(ntry, 'NtryDtls/TxDtls/Refs/AcctSvcrRef') if svcref is None: svcref = self._find(ntry, 'AcctSvcrRef') if svcref is None: svcref = self._find(ntry, 'NtryDtls/TxDtls/Refs/MsgId') if svcref is not None: sline.refnum = svcref.text # Try to find memo from different possible locations rmtinf = self._find(ntry, 'NtryDtls/TxDtls/RmtInf/Ustrd') addinf = self._find(ntry, 'AddtlNtryInf') if rmtinf is not None: sline.memo = rmtinf.text elif addinf is not None: sline.memo = addinf.text return sline
def parse_record(self, line): # Namespace stuff namespaces = {'ns': line.tag[1:].partition("}")[0]} # Get all fields type_code = line.find('ns:TypeCode', namespaces=namespaces).text date = line.find('ns:BookDate', namespaces=namespaces).text c_or_d = line.find('ns:CorD', namespaces=namespaces).text amount = line.find('ns:AccAmt', namespaces=namespaces).text id = line.find('ns:BankRef', namespaces=namespaces).text note = line.find('ns:PmtInfo', namespaces=namespaces).text # Payee name payee_name = None payee = line.find('ns:CPartySet', namespaces=namespaces) if payee: payee_account = payee.find('ns:AccHolder', namespaces=namespaces) if payee_account: payee_name = payee_account.find('ns:Name', namespaces=namespaces).text # Create statement line stmt_line = StatementLine(id, self.parse_datetime(date), note, self.parse_float(amount)) stmt_line.payee = payee_name # Credit & Debit stuff stmt_line.trntype = "DEP" if c_or_d == 'D': stmt_line.amount = -stmt_line.amount stmt_line.trntype = "DEBIT" # Various types if type_code == 'CHOU': stmt_line.trntype = "ATM" elif type_code == 'MEMD': stmt_line.trntype = "SRVCHG" elif type_code == 'OUTP': stmt_line.trntype = "PAYMENT" elif type_code == 'INP': stmt_line.trntype = "XFER" # DEBUG if self.debug: print(stmt_line, stmt_line.trntype) return stmt_line
def parse_record(self, line): """Parse given transaction line and return StatementLine object """ stmt_line = StatementLine() # date field stmt_line.date = self.xls_date(int(line[0])) # amount field if line[2]: income = line[2] outcome = 0 elif line[3]: outcome = line[3] income = 0 stmt_line.amount = income - outcome # transaction type field if (stmt_line.amount < 0): stmt_line.trntype = "DEBIT" else: stmt_line.trntype = "CREDIT" # name field # set <NAME> field with content of column 'Descrizione' # only if proper option is active if self.info2name: stmt_line.payee = line[4] # memo field stmt_line.memo = line[5] # concat "Descrizione" column at the end of <MEMO> field # if proper option is present if self.info2memo: if stmt_line.memo != '' and line[2] != '': stmt_line.memo += ' - ' stmt_line.memo += line[2] # id field stmt_line.id = generate_transaction_id(stmt_line) #print(str(stmt_line)) return stmt_line
def parse_record(self, row): self.row_num += 1 line = StatementLine() line.date = self.parse_datetime(row[0].value) line.date_user = self.parse_datetime(row[1].value) line.refnum = str(self.row_num) line.memo = row[2].value line.amount = row[3].value line.trntype = self.get_type(line) if self.statement.start_balance is None and self.row_num == 1: self.statement.start_balance = row[4].value - line.amount self.statement.start_date = line.date_user self.statement.end_balance = row[4].value line.id = self.generate_transaction_id(line) if line.id in self.seen: log.warn( "Transaction with duplicate FITID generated:\n%s\n%s\n\n" % (line, self.seen[line.id])) else: self.seen[line.id] = line return line
def _parse_line(self, ntry): sline = StatementLine() crdeb = _find(ntry, 'CdtDbtInd').text amtnode = _find(ntry, 'Amt') amt = self._parse_amount(amtnode) if crdeb == CD_DEBIT: amt = -amt payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Cdtr/Nm') else: payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Dbtr/Nm') if payee is not None: payee = payee.text sline.payee = payee sline.amount = amt dt = _find(ntry, 'ValDt') sline.date = self._parse_date(dt) bookdt = _find(ntry, 'BookgDt') sline.date_user = self._parse_date(bookdt) svcref = _find(ntry, 'NtryDtls/TxDtls/Refs/AcctSvcrRef') sline.refnum = getattr(svcref, 'text', None) rmtinf = _find(ntry, 'NtryDtls/TxDtls/RmtInf/Ustrd') sline.memo = rmtinf.text if rmtinf.text else '' addtlinf_node = _find(ntry, 'NtryDtls/TxDtls/AddtlTxInf') addtlinf = self._parse_addtlinf(addtlinf_node) if 'VÁSÁRLÁS KÁRTYÁVAL' == addtlinf and not sline.payee: sline.payee = _trim_payee(sline.memo) sline.memo += ' ' + addtlinf return sline
def _parse_line(self, item): sline = StatementLine() accountid = item[u"Számla neve"] currency = item[u"Tranzakció devizaneme"] stmt = self._find_or_create_statement(accountid, currency) sline.date = datetime.strptime(item[u"Értéknap"], self.date_format) sline.date_user = datetime.strptime(item[u"Tranzakció dátuma"], self.date_format) sline.amount = atof(item[u"Tranzakció összege"]) trtype = item[u"Tranzakció típusa"] if trtype == "VÁSÁRLÁS" or "JÓVÁÍRÁS": sline.payee = item[u"Részletek"].strip() else: sline.payee = item[u"Partner neve/Másodlagos azonosító típusa"].strip() sline.memo = item[u"Részletek"].strip() sline.id = generate_transaction_id(sline) sline.assert_valid() stmt.lines.append(sline)
def split_records(self) -> Iterator[Any]: """Return iterable object consisting of a line per transaction. It starts by determining in order): A) the account id B) the BIC of the bank C) the transactions that have 6 columns The 6 columns (spread over 3 rows) of the transactions: 1) DATE COMPTA 2) LIBELLE/REFERENCE 3) DATE OPERATION 4) DATE VALEUR 5) DEBIT EUROS 6) CREDIT EUROS Notes about parsing =================== I) The difficulty with the transactions is that the amount has not a sign so you have to know the position on the line whether it is debit or credit, see also function get_debit_credit. II) The description column on the first line contains on the left the name (payee) and on the right a check number. Lines 2 and so on determine the memo field. III) The DATE COMPTA is used as the date field. IV) Amounts follow the french format: a space as the thousands separator and a comma as the decimal separator. V) A transaction may be spread over several lines like this (columns left trimmed and separated by a bar): DATE | |DATE |DATE |DEBIT|CREDIT COMPTA| | |LIBELLE/REFERENCE |OPERATION|VALEUR|EUROS|EUROS ======|==========================================|=========|======|=====|====== 20/06|PRLV SEPA AUTOROUTES DU YYYYYYY|20/06 |20/06 |43,70| |XXXXXXXXXXXXXXXXXXXX XXXXXX |YYYYYYYYYYYYYYYYYYY VI) Or what do you think of these two transactions? Example 1: 13/06|PRLV SEPA AVANSSUR ZZZZZZZ|13/06 |13/06 | |30,99 |Direct Assurance 999999999 |F FRAIS/VIREMENT |AAAAAAAAAAA 13/06| BBBBBBB|13/06 |13/06 | |4,10 |00001 OPERATION Which are actually: 13/06|PRLV SEPA AVANSSUR ZZZZZZZ|13/06 |13/06 | |30,99 |Direct Assurance 999999999 |AAAAAAAAAAA 13/06|FRAIS/VIREMENT BBBBBBB|13/06 |13/06 | |4,10 |00001 OPERATION But due to an image in the PDF the lines are spread out wrongly by pdftotext. The image is converted into an empty line and an F followed by whitespace above (!) the rest of the current memo. Example 2: 26/09|F COTIS AFFINEA |XCCNV999 2019092500010929000001 | 0010929|25/09 |25/09|7,18 | |F COTIS AFFINEA |CONTRAT CNV0004207796 26/09| 0010930|25/09 |25/09|12,18 | |XCCNV999 2019092500010930000001 |CONTRAT CNV0004207797 which should be actually: 26/09|COTIS AFFINEA 0010929|25/09 |25/09|7,18 | |XCCNV999 2019092500010929000001 |CONTRAT CNV0004207796 26/09|COTIS AFFINEA 0010930|25/09 |25/09|12,18 | |XCCNV999 2019092500010930000001 |CONTRAT CNV0004207797 This should be solved by matching a transaction over this line and the second line after that, a lookahead. VII) In this case the second part (DEBIT DIFFERE) off the description line is not a check number but just part of the name. There is a bandwith for the check number. Some heuristics show that the start of the reference number + 19 is at least the position of the operation date column. Let's make 20 the threshold. 28/06|CARTE DEBIT DIFFERE |28/06 |30/06 |6,70 | """ def convert_str_to_list(str: str, max_items: Optional[int] = None, sep: str = r'\s\s+|\t|\n') -> List[str]: return [x for x in re.split(sep, str)[0:max_items]] def get_debit_credit(line: str, amount: str, credit_pos: int) -> str: return 'C' if line.rfind(amount) >= credit_pos else 'D' def get_amount(amount_in: Decimal, transaction_type_in: str) -> Decimal: sign_out: int = 1 amount_out: Optional[Decimal] = None # determine sign_out assert isinstance(transaction_type_in, str) assert transaction_type_in in ['D', 'C'] if transaction_type_in == 'D': sign_out = -1 # determine amount_out assert isinstance(amount_in, str) # Amount may be something like 1 827,97 m = re.search(r'^([ ,0-9]+)$', amount_in) assert m is not None amount_out = m.group(1) if amount_out[-3] == ',': amount_out = amount_out.replace(' ', '').replace(',', '.') # convert to str to keep just the last two decimals amount_out = Decimal(str(amount_out)) return sign_out * amount_out F_pattern: Pattern[str] = re.compile(r'(F\s+)') account_id_pattern: Pattern[str] = re.compile(r'VOTRE .* N° (\d+)') bank_id_pattern: Pattern[str] bank_id_pattern = re.compile(r'IBAN\s+(\S.+\S)\s+BIC\s+(\S+)') # The first header row should appear like that but the second # is spread out over two lines. header_rows: List[List[str]] = [ ['DATE', 'DATE', 'DATE', 'DEBIT', 'CREDIT'], [ 'COMPTA', 'LIBELLE/REFERENCE', 'OPERATION', 'VALEUR', 'EUROS', 'EUROS' ] ] second_header_row: List[str] = [] accounting_date_pos: Optional[int] = None # DATE COMPTA description_pos: Optional[int] = None # LIBELLE/REFERENCE operation_date_pos: Optional[int] = None # DATE OPERATION value_date_pos: Optional[int] = None # DATE VALEUR debit_pos: Optional[int] = None credit_pos: Optional[int] = None # 20 before DATE OPERATION (guessed, see note VII) check_no_pos: Optional[int] = None balance_pattern: Pattern[str] = \ re.compile(r'SOLDE (CRED|DEB)ITEUR AU (../../....).\s+([ ,0-9]+)$') transaction_pattern: Pattern[str] = \ re.compile(r'\d\d/\d\d\s+\S.*\s+\d\d/\d\d\s+\d\d/\d\d\s+[ ,0-9]+$') read_end_balance_line: bool = False stmt_line: Optional[StatementLine] = None stmt_lines: List[StatementLine] = [] payee: Optional[str] = None # to handle note VI # Need to be able to loook ahead for complicated cases lines: List[str] = [line for line in self.fin] pos: int m: Optional[Match[str]] transaction_type: str line_stripped: str accounting_date: date balance: Union[str, Decimal] row: List[str] combined_line: str # breakpoint() for idx, line in enumerate(lines, start=1): line_stripped = line.strip() if line_stripped != '': logger.debug('line %04d: %s', idx, line) pos = line_stripped.find('TOTAL DES MOUVEMENTS') if pos == 0: # found read_end_balance_line = True continue if not self.statement.account_id: m = account_id_pattern.match(line_stripped) if m: self.statement.account_id = m.group(1) self.unique_id_sets[self.statement.account_id] = set() logger.debug('account_id: %s', self.statement.account_id) continue if not self.statement.bank_id: m = bank_id_pattern.match(line_stripped) if m: self.statement.bank_id = m.group(2) logger.debug('bank_id: %s', self.statement.bank_id) elif self.bank_id: self.statement.bank_id = self.bank_id logger.debug('bank_id: %s', self.statement.bank_id) continue assert self.statement.account_id and self.statement.bank_id m = balance_pattern.match(line_stripped) if m: accounting_date = datetime.strptime(m.group(2), '%d/%m/%Y').date() balance = m.group(3) logger.debug('accounting_date: %s; balance: %s', accounting_date, balance) assert credit_pos is not None transaction_type = get_debit_credit(line, balance, credit_pos) balance = cast(Decimal, balance) if read_end_balance_line: self.statement.end_balance = get_amount( balance, transaction_type) self.statement.end_date = accounting_date break elif self.statement.start_balance is None: self.statement.start_balance = get_amount( balance, transaction_type) self.statement.start_date = accounting_date continue row = convert_str_to_list(line_stripped) if row == header_rows[0]: logger.debug('header row 1: %s', str(row)) debit_pos = line.find('DEBIT') assert debit_pos >= 0 credit_pos = line.find('CREDIT') assert credit_pos >= 0 # Create a copy second_header_row = header_rows[1][:] logger.debug('second header row: %s', str(second_header_row)) continue elif second_header_row: row = convert_str_to_list(line_stripped, sep=r'\s+|\t|\n') logger.debug('header row 2/3: %s', str(row)) # Are the columns of this row a subset of header_rows[1]? if set(row) < set(header_rows[1]): for col in row: if col == 'COMPTA': accounting_date_pos = line.find(col) assert accounting_date_pos >= 0 elif col == 'LIBELLE/REFERENCE': description_pos = line.find(col) assert description_pos >= 0 elif col == 'OPERATION': operation_date_pos = line.find(col) assert operation_date_pos >= 0 check_no_pos = operation_date_pos - 20 assert check_no_pos >= 0 elif col == 'VALEUR': value_date_pos = line.find(col) assert value_date_pos >= 0 elif col == 'EUROS': pass second_header_row.remove(col) logger.debug('second header row: %s', str(second_header_row)) continue elif len(row[0]) > 0: logger.debug('row: %s', str(row)) # Empty line if len(row) == 1 and row[0] == '': if payee is None: # Note VI: first, empty line payee = '' elif payee != '': # pragma: no cover # Obviously an empty line after an F\s+ line payee = None else: pass # several empty lines before an F line possible logger.debug('payee: %s', payee) continue # Handle note VI elif payee == '' and len(row) == 1 and F_pattern.match(row[0]): # Note VI: second line left trimmed starting with F payee = row[0][2:] logger.debug('payee: %s', payee) continue else: logger.debug('payee: %s', payee) m = transaction_pattern.match(line_stripped) # See note VI, example 2 if not m and\ idx + 2 <= len(lines) and\ len(row) >= 2 and\ (F_pattern.match(row[1]) or (len(row) >= 3 and row[1] == 'F')): assert line == lines[idx - 1] # The first line right stripped and the 'F\s+' replaced by '' combined_line = lines[idx - 1].rstrip() m = F_pattern.search(combined_line) assert m combined_line = combined_line.replace(m.group(1), '') # Add the second line (two rows further) from the point # where the first right trimmed line ends, but only if # the part before that point contains just whitespace. if lines[idx - 1 + 2][0:len(combined_line)].strip() == '': combined_line += lines[idx - 1 + 2][len(combined_line):] logger.debug('combined line stripped: %s', combined_line.strip()) m = transaction_pattern.match(combined_line.strip()) if m: del lines[idx - 1 + 2] # not necessary anymore # recalculate some helper variables line = combined_line line_stripped = line.strip() row = convert_str_to_list(line_stripped) if m: logger.debug('found a transaction line') assert debit_pos is not None and debit_pos >= 0 assert credit_pos is not None and credit_pos >= 0 assert accounting_date_pos is not None \ and accounting_date_pos >= 0 assert description_pos is not None and description_pos >= 0 assert value_date_pos is not None and value_date_pos >= 0 # emit previous transaction if any if stmt_line is not None: stmt_lines.append(stmt_line) stmt_line = None # Note VI if payee is not None and payee != '': row.insert(1, payee) payee = None logger.debug('After adding payee to the row: %s', str(row)) stmt_line = StatementLine() if len(row) >= 6: pos = line.find(row[-4]) assert check_no_pos is not None if pos >= check_no_pos: stmt_line.check_no = row[-4] logger.debug('Setting check_no: %s', row[-4]) else: logger.debug('Skip setting check_no') stmt_line.accounting_date = row[0] stmt_line.operation_date = row[-2] stmt_line.value_date = row[-3] stmt_line.amount = cast(Decimal, row[-1]) assert credit_pos is not None transaction_type = \ get_debit_credit(line, row[-1], credit_pos) # Should have 6 columns. If not: reduce. if len(row) > 6: # pragma: no cover while len(row) > 6: row[2] += ' ' + row[3] del row[3] logger.debug('row after reducing columns: %s', str(row)) stmt_line.payee = row[1] stmt_line.memo = '' stmt_line.amount = get_amount(stmt_line.amount, transaction_type) logger.debug('Statement line: %r', stmt_line) elif stmt_line is not None: assert accounting_date_pos is not None assert operation_date_pos is not None # Continuation of a transaction? # Or stated otherwise does the memo text completely fit # in the second column? pos = line.find(line_stripped) if pos > accounting_date_pos and\ pos + len(line_stripped) < operation_date_pos: if stmt_line.memo == '': stmt_line.memo = line_stripped else: stmt_line.memo += " " + line_stripped # end of while loop # assert self.statement.account_id, "No account id found." # assert self.statement.bank_id, "No bank id found." # assert stmt_lines, "No statement lines found." if stmt_line is not None: stmt_lines.append(stmt_line) # We can only yield the statement lines when end_date is there, # see function get_date() below return (sl for sl in stmt_lines)
def testAmount(self): """Only amount is set""" sline = StatementLine() sline.amount = 123.45 self.assertEqual(self.amt_hash, generate_stable_transaction_id(sline))