Example #1
0
    def tidy_text(self, text):
        data = unescape(text)
        data = unicode(data)

        # remove html pound signs
        data = data.replace('£', u'£')
        data = data.replace('&pound', u'£')

        # remove html spaces
        data = data.replace(' ', ' ')
        data = data.replace(' ', ' ')

        # remove puntuation
        data = data.replace('"', '')

        # remove duplicate spaces
        data = data.replace('       ', ' ')
        data = data.replace('     ', ' ')
        data = data.replace('   ', ' ')
        data = data.replace('  ', ' ')
        data = data.replace('  ', ' ')

        # remove other html
        data = data.replace('<br>', '')
        data = data.replace('<br/>', '')
        data = data.replace('</br>', '')

        # then drop leading and trailing whitespace
        data = data.strip()

        return data
Example #2
0
    def tidy_text(self, text):
        data = unescape(text)
        data = unicode(data)

        # remove html pound signs
        data = data.replace('&#163;', u'£')
        data = data.replace('&pound', u'£');

        # remove html spaces
        data = data.replace('&#160;', ' ')
        data = data.replace('&nbsp;', ' ')

        # remove puntuation
        data = data.replace('"','')

        # remove duplicate spaces
        data = data.replace('       ', ' ')
        data = data.replace('     ', ' ')
        data = data.replace('   ', ' ')
        data = data.replace('  ', ' ')
        data = data.replace('  ', ' ')

        # remove other html
        data = data.replace('<br>','')
        data = data.replace('<br/>','')
        data = data.replace('</br>','')

        # then drop leading and trailing whitespace
        data = data.strip()

        return data
Example #3
0
    def processAccount(self, acCount, acName, account_path, allofit):

        page = self.HexToByte(allofit['body'])

        # save this page
        self.output_page("account" + str(acCount) + ".html", page)

        soup = BeautifulSoup(page)

        logging.debug('ac path - ' + str(account_path) + ' - end')

        if account_path != "":
            # delete existing current xactions

            logging.debug('Processing :) ')

            self.statementbuilder = StatementBuilder(self.facade, account_path,
                                                     self.token)

            # need to get last statement and make a new one every time
            self.statementbuilder.make_recent_dif_statement(
                'Fd-recent', 'Scraper', None)  #TODO change this

            isVisa = False
            loginform = soup.find(
                'input', attrs={'name': 'cmd_sort_referenceAscending'})
            if loginform != None:
                isVisa = True

                bal_tables = soup.findAll(
                    'table', attrs={'class': 'fdTableBackgroundOne'})
                balance_table = bal_tables[2]

                if balance_table <> None:
                    vals = balance_table.findAll('td')

                    if vals:
                        bal = vals[1].text
                        data = bal.replace('&#163;', u'£')
                        data = data.strip(u'£')
                        if data[-1] == 'D':
                            data = data.replace('DB', '')
                            data = data.replace('D', '')
                            lastbal = int(float(data) * 100)
                            firstbal = 0 - lastbal
                        else:
                            data = data.replace('CR', '')
                            data = data.replace('C', '')
                            firstbal = int(float(data) * 100)

                        self.statementbuilder.set_current_balance(firstbal)

            logging.debug(
                "-----------------------------*******---------------------")
            if isVisa:
                logging.debug("found visa --")

            acTable = soup.find('table', attrs={'class': 'fdStatTable'})

            # if no table then no new data afaik
            if acTable != None:
                datarows = acTable.findAll('tr')

                next = False

                # build the post values up
                atts = {}

                isFirst = True
                firstbal = 0
                firstdate = ""

                lastbal = 0
                lastdate = ""

                doBalance = False

                dp = DateParser()

                for rows in datarows:
                    vals = rows.findAll('td')

                    if vals:
                        for i, val in enumerate(vals):

                            if val.text:
                                data = val.text.strip()
                                data = unescape(data)
                                data = unicode(data)

                            else:
                                data = ""

                            if data != "&nbsp;":
                                data = data.replace('&nbsp;', '')
                                if i == 0:
                                    if data != "":
                                        try:
                                            lastdate = dp.ymd_from_date(
                                                dp.date_from_dmy(data, '/'))
                                        except:
                                            logging.warn(
                                                "Invalid FD date format - probably no transactions"
                                            )
                                            return

                                        if firstdate == "":
                                            firstdate = lastdate

                                    atts['date'] = lastdate

                                if (i == 1 and not isVisa) or (i == 2
                                                               and isVisa):
                                    atts['display'] = data[0:19]
                                    atts['extradisplay'] = data[19:]

                                if (i == 2 and not isVisa) or (i == 3
                                                               and isVisa):
                                    if data != "":
                                        data = data.strip(u'£')
                                        data = data.strip(u'D')
                                        data = data.strip(u'B')
                                        if data == '':
                                            atts['amount'] = 0
                                        else:
                                            atts['amount'] = int(
                                                float(data) * 100)
                                        atts['type'] = 'Debit'

                                if (i == 3 and not isVisa) or (i == 4
                                                               and isVisa):
                                    if data != "":
                                        data = data.strip(u'£')
                                        data = data.strip(u'C')
                                        data = data.strip(u'R')
                                        if data == '':
                                            atts['amount'] = 0
                                        else:
                                            atts['amount'] = int(
                                                float(data) * 100)
                                        atts['type'] = 'Credit'

                                if not isVisa:
                                    if i == 4:
                                        data = data.strip(u'£')
                                        if data != "":
                                            lastbal = int(float(data) * 100)

                                            if isFirst:
                                                isFirst = False
                                                firstbal = lastbal
                                                doBalance = True

                                    if i == 5:
                                        if doBalance:
                                            doBalance = False
                                            if data == "D":
                                                firstbal = 0 - firstbal
                                            self.statementbuilder.set_current_balance(
                                                firstbal)

                        self.statementbuilder.make_xact(atts)

                self.statementbuilder.put_statement()
                self.current_statement = self.current_statement + 1
    def processAccount(self, acCount, acName, account_path, allofit):
        
        page = self.HexToByte( allofit['body'])
        
        # save this page
        self.output_page("account" + str(acCount) + ".html", page) 
        
        soup = BeautifulSoup(page)
            
        logging.debug('ac path - ' + str(account_path) + ' - end' )
        
        if account_path != "":
            # delete existing current xactions
            
            logging.debug('Processing :) ' )
            
            self.statementbuilder = StatementBuilder(self.facade, account_path, self.token)
           
            # need to get last statement and make a new one every time
            self.statementbuilder.make_recent_dif_statement('Fd-recent', 'Scraper', None) #TODO change this 
                        
            isVisa = False
            loginform=soup.find('input', attrs={'name' : 'cmd_sort_referenceAscending'})
            if loginform != None:
                isVisa = True
                
                bal_tables=soup.findAll('table', attrs={'class' : 'fdTableBackgroundOne'})
                balance_table = bal_tables[2]

                if balance_table <> None:
                    vals = balance_table.findAll('td')

                    if vals:
                        bal = vals[1].text
                        data = bal.replace('&#163;', u'£');
                        data = data.strip(u'£')
                        if data[-1] == 'D':
                            data = data.replace('DB','')
                            data = data.replace('D','')
                            lastbal = int( float(data) * 100 )
                            firstbal = 0 - lastbal
                        else:
                            data = data.replace('CR','')
                            data = data.replace('C','')
                            firstbal = int( float(data) * 100 )
                        
                        self.statementbuilder.set_current_balance(firstbal)    
                   
            
            logging.debug("-----------------------------*******---------------------")
            if isVisa:
                logging.debug("found visa --")
            
            acTable=soup.find('table', attrs={'class' : 'fdStatTable'})
            
            # if no table then no new data afaik
            if acTable != None:
               datarows=acTable.findAll('tr')
               
               next = False
               
                
               # build the post values up
               atts = {}
               
               isFirst = True
               firstbal = 0
               firstdate = ""
               
               lastbal = 0
               lastdate = ""
               
               doBalance = False
               
               dp = DateParser()
                           
               for rows in datarows:
                   vals = rows.findAll('td')
                   
                   if vals:
                       for i, val in enumerate(vals):
                           
                           if val.text:
                               data = val.text.strip()
                               data = unescape(data)
                               data = unicode(data)
                               
                           else:
                               data = ""
                           
                           if data != "&nbsp;":
                               data = data.replace('&nbsp;','')
                               if i == 0:
                                   if data != "":
                                       try:
                                           lastdate = dp.ymd_from_date(dp.date_from_dmy(data,'/'))
                                       except:
                                           logging.warn("Invalid FD date format - probably no transactions")
                                           return
                                       
                                       if firstdate == "":
                                           firstdate = lastdate
                                       
                                   atts['date'] = lastdate
                                   
                               if (i == 1 and not isVisa) or (i == 2 and isVisa):
                                       atts['display'] = data[0:19]
                                       atts['extradisplay'] = data[19:]
                                   
                               if (i == 2 and not isVisa) or (i == 3 and isVisa):
                                   if data != "":
                                       data = data.strip(u'£')
                                       data = data.strip(u'D')
                                       data = data.strip(u'B')
                                       if data == '':
                                           atts['amount'] = 0
                                       else:
                                           atts['amount'] = int( float(data) * 100 )
                                       atts['type'] = 'Debit'
                                           
                               if (i == 3 and not isVisa) or (i == 4 and isVisa):
                                   if data != "":
                                       data = data.strip(u'£')
                                       data = data.strip(u'C')
                                       data = data.strip(u'R')
                                       if data == '':
                                           atts['amount'] = 0
                                       else:
                                           atts['amount'] = int( float(data) * 100 )
                                       atts['type'] = 'Credit'
                                       
                               if not isVisa:
                                   if i == 4:
                                       data = data.strip(u'£')
                                       if data != "":
                                           lastbal = int( float(data) * 100 )
                                           
                                           if isFirst:
                                               isFirst = False
                                               firstbal = lastbal
                                               doBalance = True
                                               
                                   if i == 5:
                                       if doBalance:
                                           doBalance = False
                                           if data == "D":
                                               firstbal = 0 - firstbal
                                           self.statementbuilder.set_current_balance(firstbal) 
                                       
                       self.statementbuilder.make_xact(atts)
           
               self.statementbuilder.put_statement()
               self.current_statement = self.current_statement + 1