def scrub_html_email(text, cid_mapping={}): from BeautifulSoup import BeautifulSoup soup = BeautifulSoup(text) for tag in soup.findAll(True): attrs = dict(tag.attrs) if 'src' in attrs: src = attrs['src'] if src[:4]=='cid:': tag['src'] = cid_mapping[src[4:]] mapped = soup.renderContents() scrubber = tuit.scrubber.Scrubber(autolink=False) # The scrubber removes complete html documents out of the box? Weird... scrubber.disallowed_tags_save_content.add('html') scrubber.disallowed_tags_save_content.add('body') scrubber.disallowed_tags_save_content.add('xml') scrubber.disallowed_tags_save_content.add('doctype') scrubber.allowed_attributes.add('color') scrubbed = scrubber.scrub(mapped) return scrubbed
def getOFX(account, interval): sitename = account[0] _acct_num = account[1] #account value defined in sites.dat acct_type = account[2] user = account[3] password = account[4] acct_num = _acct_num.split(':')[ 0] #bank account# (stripped of :xxx version) #get site and other user-defined data site = userdat.sites[sitename] #set the interval (days) minInterval = FieldVal( site, 'mininterval' ) #minimum interval (days) defined for this site (optional) if minInterval: interval = max(minInterval, interval) #use the longer of the two #set the start date/time dtstart = time.strftime("%Y%m%d", time.localtime(time.time() - interval * 86400)) dtnow = time.strftime("%Y%m%d%H%M%S", time.localtime()) client = OFXClient(site, user, password) print sitename, ':', acct_num, ": Getting records since: ", dtstart status = True #we'll place ofx data transfers in xfrdir (defined in control2.py). #check to see if we have this directory. if not, create it if not os.path.exists(xfrdir): try: os.mkdir(xfrdir) except: print '** Error. Could not create', xfrdir system.exit() #remove illegal WinFile characters from the file name (in case someone included them in the sitename) #Also, the os.system() call doesn't allow the '&' char, so we'll replace it too sitename = ''.join(a for a in sitename if a not in ' &\/:*?"<>|()') #first char is a space ofxFileSuffix = str(random.randrange(1e5, 1e6)) + ".ofx" ofxFileName = xfrdir + sitename + dtnow + ofxFileSuffix try: if acct_num == '': query = client.acctQuery( "19700101000000" ) #19700101000000 is just a default DTSTART date/time string else: caps = FieldVal(site, "CAPS") if "CCSTMT" in caps: query = client.ccQuery(acct_num, dtstart) elif "INVSTMT" in caps: #if we have a brokerid, use it. Otherwise, try the fiorg value. orgID = FieldVal(site, 'BROKERID') if orgID == '': orgID = FieldVal(site, 'FIORG') if orgID == '': msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BrokerID or FIORG value defined.' raise Exception(msg) query = client.invstQuery(orgID, acct_num, dtstart) elif "BASTMT" in caps: bankid = FieldVal(site, "BANKID") if bankid == '': msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BANKID value defined.' raise Exception(msg) query = client.baQuery(bankid, acct_num, dtstart, acct_type) SendRequest = True if Debug: print query print ask = raw_input( 'DEBUG: Send request to bank server (y/n)?').upper() if ask == 'N': return False, '' #do the deed client.doQuery(query, ofxFileName) if not client.status: return False, '' #check the ofx file and make sure it looks valid (contains header and <ofx>...</ofx> blocks) if glob.glob(ofxFileName) == []: status = False #no ofx file? else: f = open(ofxFileName, 'r') content = f.read().upper() f.close if acct_num <> _acct_num: #replace bank account number w/ value defined in sites.dat content = content.replace('<ACCTID>' + acct_num, '<ACCTID>' + _acct_num) f = open(ofxFileName, 'w') f.write(content) f.close() content = ''.join(a for a in content if a not in '\r\n ') #strip newlines & spaces if content.find('OFXHEADER:') < 0 and content.find( '<OFX>') < 0 and content.find('</OFX>') < 0: #throw exception and exit raise Exception("Invalid OFX statement.") #look for <SEVERITY>ERROR code... rlc*2013 if content.find('<SEVERITY>ERROR') > 0: #throw exception and exit raise Exception("OFX message contains ERROR condition") #attempted debug of a Vanguard issue... rlc*2010 #if content.find('<INVPOSLIST>') > -1 and content.find('<SECLIST>') < 0: #DEBUG: rlc*5/2011 if content.find('<INVPOS>') > -1 and content.find('<SECLIST>') < 0: #An investment statement must contain a <SECLIST> section when a <INVPOSLIST> section exists #Some Vanguard statements have been missing this when there are no transactions, causing Money to crash #It may be necessary to match every investment position with a security entry, but we'll try to just #verify the existence of these section pairs. rlc*9/2010 raise Exception( "OFX statement is missing required <SECLIST> section.") #cleanup the file if needed scrubber.scrub(ofxFileName, site) except Exception as inst: status = False print inst if glob.glob(ofxFileName) <> []: print '** Review', ofxFileName, 'for possible clues...' if Debug: traceback.print_exc() return status, ofxFileName
def getOFX(account, interval): sitename = account[0] _acct_num = account[1] #account value defined in sites.dat acct_type = account[2] user = account[3] password = account[4] acct_num = _acct_num.split(':')[0] #bank account# (stripped of :xxx version) #get site and other user-defined data site = userdat.sites[sitename] #set the interval (days) minInterval = FieldVal(site,'mininterval') #minimum interval (days) defined for this site (optional) if minInterval: interval = max(minInterval, interval) #use the longer of the two #set the start date/time dtstart = time.strftime("%Y%m%d",time.localtime(time.time()-interval*86400)) dtnow = time.strftime("%Y%m%d%H%M%S",time.localtime()) client = OFXClient(site, user, password) print sitename,':',acct_num,": Getting records since: ",dtstart status = True #we'll place ofx data transfers in xfrdir (defined in control2.py). #check to see if we have this directory. if not, create it if not os.path.exists(xfrdir): try: os.mkdir(xfrdir) except: print '** Error. Could not create', xfrdir system.exit() #remove illegal WinFile characters from the file name (in case someone included them in the sitename) #Also, the os.system() call doesn't allow the '&' char, so we'll replace it too sitename = ''.join(a for a in sitename if a not in ' &\/:*?"<>|()') #first char is a space ofxFileSuffix = str(random.randrange(1e5,1e6)) + ".ofx" ofxFileName = xfrdir + sitename + dtnow + ofxFileSuffix try: if acct_num == '': query = client.acctQuery("19700101000000") #19700101000000 is just a default DTSTART date/time string else: caps = FieldVal(site, "CAPS") if "CCSTMT" in caps: query = client.ccQuery(acct_num, dtstart) elif "INVSTMT" in caps: #if we have a brokerid, use it. Otherwise, try the fiorg value. orgID = FieldVal(site, 'BROKERID') if orgID == '': orgID = FieldVal(site, 'FIORG') if orgID == '': msg = '** Error: Site', sitename, 'does not have a (REQUIRED) BrokerID or FIORG value defined.' raise Exception(msg) query = client.invstQuery(orgID, acct_num, dtstart) elif "BASTMT" in caps: bankid = FieldVal(site, "BANKID") if bankid == '': msg='** Error: Site', sitename, 'does not have a (REQUIRED) BANKID value defined.' raise Exception(msg) query = client.baQuery(bankid, acct_num, dtstart, acct_type) SendRequest = True if Debug: print query print ask = raw_input('DEBUG: Send request to bank server (y/n)?').upper() if ask=='N': return False, '' #do the deed client.doQuery(query, ofxFileName) if not client.status: return False, '' #check the ofx file and make sure it looks valid (contains header and <ofx>...</ofx> blocks) if glob.glob(ofxFileName) == []: status = False #no ofx file? else: f = open(ofxFileName,'r') content = f.read().upper() f.close if acct_num <> _acct_num: #replace bank account number w/ value defined in sites.dat content = content.replace('<ACCTID>'+acct_num, '<ACCTID>'+ _acct_num) f = open(ofxFileName,'w') f.write(content) f.close() content = ''.join(a for a in content if a not in '\r\n ') #strip newlines & spaces if content.find('OFXHEADER:') < 0 and content.find('<OFX>') < 0 and content.find('</OFX>') < 0: #throw exception and exit raise Exception("Invalid OFX statement.") #look for <SEVERITY>ERROR code... rlc*2013 if content.find('<SEVERITY>ERROR') > 0: #throw exception and exit raise Exception("OFX message contains ERROR condition") #attempted debug of a Vanguard issue... rlc*2010 #if content.find('<INVPOSLIST>') > -1 and content.find('<SECLIST>') < 0: #DEBUG: rlc*5/2011 if content.find('<INVPOS>') > -1 and content.find('<SECLIST>') < 0: #An investment statement must contain a <SECLIST> section when a <INVPOSLIST> section exists #Some Vanguard statements have been missing this when there are no transactions, causing Money to crash #It may be necessary to match every investment position with a security entry, but we'll try to just #verify the existence of these section pairs. rlc*9/2010 raise Exception("OFX statement is missing required <SECLIST> section.") #cleanup the file if needed scrubber.scrub(ofxFileName, site) except Exception as inst: status = False print inst if glob.glob(ofxFileName) <> []: print '** Review', ofxFileName, 'for possible clues...' if Debug: traceback.print_exc() return status, ofxFileName
print('Searching %s for statements to import' % importdir) for f in glob.glob(importdir + '*.*'): fname = os.path.basename(f) #full base filename.extension bname = os.path.splitext(fname)[0] #basename w/o extension bext = os.path.splitext(fname)[1] #file extension with open(f) as ifile: dat = ifile.read() #only import if it looks like an ofx file if validOFX(dat) == '': print("Importing %s" % fname) if 'NEWFILEUID:PSIMPORT' not in dat[:200]: #only scrub if it hasn't already been imported (and hence, scrubbed) site = getSite(dat) scrubber.scrub(f, site) #set NEWFILEUID:PSIMPORT to flag the file as having already been imported/scrubbed #don't want to accidentally scrub twice with open(f) as ifile: ofx = ifile.read() p = re.compile(r'NEWFILEUID:.*') ofx2 = p.sub('NEWFILEUID:PSIMPORT', ofx) if ofx2: with open(f, 'w') as ofile: ofile.write(ofx2) #preserve origina file type but save w/ ofx extension outname = xfrdir + fname + ('' if bext == '.ofx' else '.ofx') os.rename(f, outname) ofxList.append(['import file', '', outname])