class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" return self.parser.parseString(ofc).asDict()
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.add_zero_to_empty_ledger_tag(ofc) ofc = self.remove_inline_closing_tags(ofc) ofc = ofxtools.util.strip_empty_tags(ofc) ofc = self._translate_chknum_to_checknum(ofc) # if you don't have a good stomach, skip this part # XXX:needs better solution import sys sys.setrecursionlimit(5000) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def add_zero_to_empty_ledger_tag(self, ofc): """ Fix an OFC, by adding zero to LEDGER blank tag """ return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc) def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc) filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM' , ofc) return filled_ofc def _translate_chknum_to_checknum(self, ofc): """ Some banks put an CHKNUM instead of CHECKNUM. this method translates CHKNUM to CHECKNUM in order to parse this information correctly """ return re.sub('CHKNUM', 'CHECKNUM', ofc) def _inject_tags(self, ofc): tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
class QifParser: def __init__(self, debug=False): account_items = { 'N' : "Name", 'T' : "AccountType", 'D' : "Description", 'L' : "CreditLimit", 'X' : "UnknownField", 'B' : "Balance", '/' : "BalanceDate", '$' : "Balance" } noninvestment_items = { 'D' : "Date", 'T' : "Amount", 'U' : "Amount2", 'C' : "Cleared", 'N' : "Number", 'P' : "Payee", 'M' : "Memo", 'L' : "Category", 'A' : "Address", 'S' : "SplitCategory", 'E' : "SplitMemo", '$' : "SplitAmount", '-' : "NegativeSplitAmount" } investment_items = { 'D' : "Date", 'N' : "Action", 'Y' : "Security", 'I' : "Price", 'Q' : "Quantity", 'T' : "Amount", 'C' : "Cleared", 'P' : "Text", 'M' : "Memo", 'O' : "Commission", 'L' : "TransferAccount", '$' : "TransferAmount" } category_items = { 'N' : "Name", 'D' : "Description", 'T' : "TaxRelated", 'I' : "IncomeCategory", 'E' : "ExpenseCategory", 'B' : "BudgetAmount", 'R' : "TaxSchedule" } class_items = { 'N' : "Name", 'D' : "Description" } options = Group(CaselessLiteral('!Option:') + restOfLine).suppress() banktxns = Group(CaselessLiteral('!Type:Bank').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("BankTransactions") cashtxns = Group(CaselessLiteral('!Type:Cash').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CashTransactions") ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(), CaselessLiteral('!Type!CCard').suppress()]) + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + ZeroOrMore(self._items(investment_items)) ).setResultsName("InvestmentTransactions") acctlist = Group(CaselessLiteral('!Account').suppress() + ZeroOrMore(Or([self._items(account_items, name="AccountInfo")])) ).setResultsName("AccountList") category = Group(CaselessLiteral('!Type:Cat').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("CategoryList") classlist = Group(CaselessLiteral('!Type:Class').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("ClassList") self.parser = Group(ZeroOrMore(White()).suppress() + ZeroOrMore(acctlist).suppress() + OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) + ZeroOrMore(category | classlist).suppress() + ZeroOrMore(White()).suppress() ).setResultsName("QifStatement") if (debug): self.parser.setDebugActions(_ofxtoolsStartDebugAction, _ofxtoolsSuccessDebugAction, _ofxtoolsExceptionDebugAction) def _items(self, items, name="Transaction"): item_list = [] for (code, name) in items.items(): item = self._item(code, name) item_list.append(item) return Group(OneOrMore(Or(item_list)) + oneOf('^EUR ^').setResultsName('Currency') + LineEnd().suppress() ).setResultsName(name) def _item(self, code, name): return CaselessLiteral(code).suppress() + \ restOfLine.setResultsName(name) + \ LineEnd().suppress() def parse(self, qif): return self.parser.parseString(qif)
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.add_zero_to_empty_ledger_tag(ofc) ofc = self.remove_inline_closing_tags(ofc) ofc = ofxtools.util.strip_empty_tags(ofc) ofc = self._translate_chknum_to_checknum(ofc) # if you don't have a good stomach, skip this part # XXX:needs better solution import sys sys.setrecursionlimit(5000) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def add_zero_to_empty_ledger_tag(self, ofc): """ Fix an OFC, by adding zero to LEDGER blank tag """ return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc) def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID', ofc) filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM', ofc) return filled_ofc def _translate_chknum_to_checknum(self, ofc): """ Some banks put an CHKNUM instead of CHECKNUM. this method translates CHKNUM to CHECKNUM in order to parse this information correctly """ return re.sub('CHKNUM', 'CHECKNUM', ofc) def _inject_tags(self, ofc): tags = "<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
class QifParser: def __init__(self, debug=False): account_items = { 'N' : "Name", 'T' : "AccountType", 'D' : "Description", 'L' : "CreditLimit", 'X' : "UnknownField", 'B' : "Balance", '/' : "BalanceDate", '$' : "Balance" } noninvestment_items = { 'D' : "Date", 'T' : "Amount", 'U' : "Amount2", 'C' : "Cleared", 'N' : "Number", 'P' : "Payee", 'M' : "Memo", 'L' : "Category", 'A' : "Address", 'S' : "SplitCategory", 'E' : "SplitMemo", '$' : "SplitAmount", '-' : "NegativeSplitAmount" } investment_items = { 'D' : "Date", 'N' : "Action", 'Y' : "Security", 'I' : "Price", 'Q' : "Quantity", 'T' : "Amount", 'C' : "Cleared", 'P' : "Text", 'M' : "Memo", 'O' : "Commission", 'L' : "TransferAccount", '$' : "TransferAmount" } category_items = { 'N' : "Name", 'D' : "Description", 'T' : "TaxRelated", 'I' : "IncomeCategory", 'E' : "ExpenseCategory", 'B' : "BudgetAmount", 'R' : "TaxSchedule" } class_items = { 'N' : "Name", 'D' : "Description" } options = Group(CaselessLiteral('!Option:') + restOfLine).suppress() banktxns = Group(CaselessLiteral('!Type:Bank').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("BankTransactions") cashtxns = Group(CaselessLiteral('!Type:Cash').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CashTransactions") ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(), CaselessLiteral('!Type!CCard').suppress()]) + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + ZeroOrMore(self._items(investment_items)) ).setResultsName("InvestmentTransactions") acctlist = Group(CaselessLiteral('!Account').suppress() + ZeroOrMore(Or([self._items(account_items, name="AccountInfo")])) ).setResultsName("AccountList") category = Group(CaselessLiteral('!Type:Cat').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("CategoryList") classlist = Group(CaselessLiteral('!Type:Class').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("ClassList") self.parser = Group(ZeroOrMore(White()).suppress() + ZeroOrMore(acctlist).suppress() + OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) + ZeroOrMore(White()).suppress() ).setResultsName("QifStatement") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _items(self, items, name="Transaction"): item_list = [] for (code, name) in items.iteritems(): item = self._item(code, name) item_list.append(item) return Group(OneOrMore(Or(item_list)) + oneOf('^EUR ^').setResultsName('Currency') + LineEnd().suppress() ).setResultsName(name) def _item(self, code, name): return CaselessLiteral(code).suppress() + \ restOfLine.setResultsName(name) + \ LineEnd().suppress() def parse(self, qif): return self.parser.parseString(qif)
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.remove_inline_closing_tags(ofc) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc) filled_ofc = re.sub(expression % 'CHKNUM', replacement % 'CHKNUM' , ofc) return filled_ofc def _inject_tags(self, ofc): tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')