def tidy_text(self, text): data = unescape(text) data = unicode(data) # remove html pound signs data = data.replace('£', u'£') data = data.replace('£', u'£') # remove html spaces data = data.replace(' ', ' ') data = data.replace(' ', ' ') # remove puntuation data = data.replace('"', '') # remove duplicate spaces data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') # remove other html data = data.replace('<br>', '') data = data.replace('<br/>', '') data = data.replace('</br>', '') # then drop leading and trailing whitespace data = data.strip() return data
def tidy_text(self, text): data = unescape(text) data = unicode(data) # remove html pound signs data = data.replace('£', u'£') data = data.replace('£', u'£'); # remove html spaces data = data.replace(' ', ' ') data = data.replace(' ', ' ') # remove puntuation data = data.replace('"','') # remove duplicate spaces data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') data = data.replace(' ', ' ') # remove other html data = data.replace('<br>','') data = data.replace('<br/>','') data = data.replace('</br>','') # then drop leading and trailing whitespace data = data.strip() return data
def processAccount(self, acCount, acName, account_path, allofit): page = self.HexToByte(allofit['body']) # save this page self.output_page("account" + str(acCount) + ".html", page) soup = BeautifulSoup(page) logging.debug('ac path - ' + str(account_path) + ' - end') if account_path != "": # delete existing current xactions logging.debug('Processing :) ') self.statementbuilder = StatementBuilder(self.facade, account_path, self.token) # need to get last statement and make a new one every time self.statementbuilder.make_recent_dif_statement( 'Fd-recent', 'Scraper', None) #TODO change this isVisa = False loginform = soup.find( 'input', attrs={'name': 'cmd_sort_referenceAscending'}) if loginform != None: isVisa = True bal_tables = soup.findAll( 'table', attrs={'class': 'fdTableBackgroundOne'}) balance_table = bal_tables[2] if balance_table <> None: vals = balance_table.findAll('td') if vals: bal = vals[1].text data = bal.replace('£', u'£') data = data.strip(u'£') if data[-1] == 'D': data = data.replace('DB', '') data = data.replace('D', '') lastbal = int(float(data) * 100) firstbal = 0 - lastbal else: data = data.replace('CR', '') data = data.replace('C', '') firstbal = int(float(data) * 100) self.statementbuilder.set_current_balance(firstbal) logging.debug( "-----------------------------*******---------------------") if isVisa: logging.debug("found visa --") acTable = soup.find('table', attrs={'class': 'fdStatTable'}) # if no table then no new data afaik if acTable != None: datarows = acTable.findAll('tr') next = False # build the post values up atts = {} isFirst = True firstbal = 0 firstdate = "" lastbal = 0 lastdate = "" doBalance = False dp = DateParser() for rows in datarows: vals = rows.findAll('td') if vals: for i, val in enumerate(vals): if val.text: data = val.text.strip() data = unescape(data) data = unicode(data) else: data = "" if data != " ": data = data.replace(' ', '') if i == 0: if data != "": try: lastdate = dp.ymd_from_date( dp.date_from_dmy(data, '/')) except: logging.warn( "Invalid FD date format - probably no transactions" ) return if firstdate == "": firstdate = lastdate atts['date'] = lastdate if (i == 1 and not isVisa) or (i == 2 and isVisa): atts['display'] = data[0:19] atts['extradisplay'] = data[19:] if (i == 2 and not isVisa) or (i == 3 and isVisa): if data != "": data = data.strip(u'£') data = data.strip(u'D') data = data.strip(u'B') if data == '': atts['amount'] = 0 else: atts['amount'] = int( float(data) * 100) atts['type'] = 'Debit' if (i == 3 and not isVisa) or (i == 4 and isVisa): if data != "": data = data.strip(u'£') data = data.strip(u'C') data = data.strip(u'R') if data == '': atts['amount'] = 0 else: atts['amount'] = int( float(data) * 100) atts['type'] = 'Credit' if not isVisa: if i == 4: data = data.strip(u'£') if data != "": lastbal = int(float(data) * 100) if isFirst: isFirst = False firstbal = lastbal doBalance = True if i == 5: if doBalance: doBalance = False if data == "D": firstbal = 0 - firstbal self.statementbuilder.set_current_balance( firstbal) self.statementbuilder.make_xact(atts) self.statementbuilder.put_statement() self.current_statement = self.current_statement + 1
def processAccount(self, acCount, acName, account_path, allofit): page = self.HexToByte( allofit['body']) # save this page self.output_page("account" + str(acCount) + ".html", page) soup = BeautifulSoup(page) logging.debug('ac path - ' + str(account_path) + ' - end' ) if account_path != "": # delete existing current xactions logging.debug('Processing :) ' ) self.statementbuilder = StatementBuilder(self.facade, account_path, self.token) # need to get last statement and make a new one every time self.statementbuilder.make_recent_dif_statement('Fd-recent', 'Scraper', None) #TODO change this isVisa = False loginform=soup.find('input', attrs={'name' : 'cmd_sort_referenceAscending'}) if loginform != None: isVisa = True bal_tables=soup.findAll('table', attrs={'class' : 'fdTableBackgroundOne'}) balance_table = bal_tables[2] if balance_table <> None: vals = balance_table.findAll('td') if vals: bal = vals[1].text data = bal.replace('£', u'£'); data = data.strip(u'£') if data[-1] == 'D': data = data.replace('DB','') data = data.replace('D','') lastbal = int( float(data) * 100 ) firstbal = 0 - lastbal else: data = data.replace('CR','') data = data.replace('C','') firstbal = int( float(data) * 100 ) self.statementbuilder.set_current_balance(firstbal) logging.debug("-----------------------------*******---------------------") if isVisa: logging.debug("found visa --") acTable=soup.find('table', attrs={'class' : 'fdStatTable'}) # if no table then no new data afaik if acTable != None: datarows=acTable.findAll('tr') next = False # build the post values up atts = {} isFirst = True firstbal = 0 firstdate = "" lastbal = 0 lastdate = "" doBalance = False dp = DateParser() for rows in datarows: vals = rows.findAll('td') if vals: for i, val in enumerate(vals): if val.text: data = val.text.strip() data = unescape(data) data = unicode(data) else: data = "" if data != " ": data = data.replace(' ','') if i == 0: if data != "": try: lastdate = dp.ymd_from_date(dp.date_from_dmy(data,'/')) except: logging.warn("Invalid FD date format - probably no transactions") return if firstdate == "": firstdate = lastdate atts['date'] = lastdate if (i == 1 and not isVisa) or (i == 2 and isVisa): atts['display'] = data[0:19] atts['extradisplay'] = data[19:] if (i == 2 and not isVisa) or (i == 3 and isVisa): if data != "": data = data.strip(u'£') data = data.strip(u'D') data = data.strip(u'B') if data == '': atts['amount'] = 0 else: atts['amount'] = int( float(data) * 100 ) atts['type'] = 'Debit' if (i == 3 and not isVisa) or (i == 4 and isVisa): if data != "": data = data.strip(u'£') data = data.strip(u'C') data = data.strip(u'R') if data == '': atts['amount'] = 0 else: atts['amount'] = int( float(data) * 100 ) atts['type'] = 'Credit' if not isVisa: if i == 4: data = data.strip(u'£') if data != "": lastbal = int( float(data) * 100 ) if isFirst: isFirst = False firstbal = lastbal doBalance = True if i == 5: if doBalance: doBalance = False if data == "D": firstbal = 0 - firstbal self.statementbuilder.set_current_balance(firstbal) self.statementbuilder.make_xact(atts) self.statementbuilder.put_statement() self.current_statement = self.current_statement + 1