Esempio n. 1
0
    def get_history(self, currency):
        self.MONTHS = self.FR_MONTHS if currency == 'EUR' else self.US_MONTHS
        #checking if the card is still valid
        if self.doc.xpath('//div[@id="errorbox"]'):
            return

        #adding a time delta because amex have hard time to put the date in a good interval
        beginning_date = self.get_beginning_debit_date() - datetime.timedelta(
            days=360)
        end_date = self.get_end_debit_date()

        guesser = ChaoticDateGuesser(beginning_date, end_date)

        for tr in reversed(
                self.doc.xpath(
                    '//div[@id="txnsSection"]//tr[@class="tableStandardText"]')
        ):
            cols = tr.findall('td')

            t = Transaction()

            day, month = CleanText().filter(cols[self.COL_DATE]).split(' ', 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip('.')) + 1
            date = guesser.guess_date(day, month)

            rdate = None
            try:
                detail = cols[self.COL_TEXT].xpath(
                    './div[has-class("hiddenROC")]')[0]
            except IndexError:
                pass
            else:
                m = re.search(r' (\d{2} \D{3,4})', (' '.join(
                    [txt.strip() for txt in detail.itertext()])).strip())
                if m:
                    rday, rmonth = m.group(1).strip().split(' ')
                    rday = int(rday)
                    rmonth = self.MONTHS.index(rmonth.rstrip('.')) + 1
                    rdate = guesser.guess_date(rday, rmonth)
                detail.drop_tree()

            raw = (' '.join([
                txt.strip() for txt in cols[self.COL_TEXT].itertext()
            ])).strip()
            credit = CleanText().filter(cols[self.COL_CREDIT])
            debit = CleanText().filter(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = rdate or date
            t.raw = re.sub(r'[ ]+', ' ', raw)
            t.label = re.sub('(.*?)( \d+)?  .*', r'\1', raw).strip()
            t.amount = CleanDecimal(replace_dots=currency == 'EUR').filter(
                credit or debit) * (1 if credit else -1)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 2
0
    def get_history(self, date_guesser):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        for line in lines[1:]:  # first line is balance
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            [date, label, _, amount
             ] = [self.parser.tocleanstring(td) for td in line.xpath('./td')]

            t = Transaction(0)
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError(
                        'Unable to read card balance in history: %r' % label)

                t.date = parse_french_date(m.group(1))
                t.amount = -t.amount
            else:
                day, month = map(int, date.split('/', 1))
                t.date = date_guesser.guess_date(day, month)

            t.type = t.TYPE_CARD
            t.rdate = t.date
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                print t
                print t.label
                raise

            yield t
Esempio n. 3
0
    def get_history(self, date_guesser):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        for line in lines[1:]:  # first line is balance
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            [date, label, _, amount] = [self.parser.tocleanstring(td)
                                        for td in line.xpath('./td')]

            t = Transaction(0)
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError('Unable to read card balance in history: %r' % label)

                t.date = parse_french_date(m.group(1))
                t.amount = -t.amount
            else:
                day, month = map(int, date.split('/', 1))
                t.date = date_guesser.guess_date(day, month)

            t.type = t.TYPE_CARD
            t.rdate = t.date
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                print t
                print t.label
                raise

            yield t
Esempio n. 4
0
 def get_card_transactions(self, latest_date, ongoing_coming):
     for item in self.doc.xpath('//table[@class="ca-table"][2]//tr[td]'):
         if CleanText('./td[2]/b')(item):
             # This node is a summary containing the 'date' for all following transactions.
             raw_date = Regexp(CleanText('./td[2]/b/text()'), r'le (.*) :')(item)
             if latest_date and parse_french_date(raw_date).date() > latest_date:
                 # This summary has already been fetched
                 continue
             latest_date = parse_french_date(raw_date).date()
             if latest_date < ongoing_coming:
                 # This summary is anterior to the ongoing_coming so we create a transaction from it
                 tr = FrenchTransaction()
                 tr.date = tr.rdate = latest_date
                 tr.raw = tr.label = CleanText('./td[2]/b/text()')(item)
                 tr.amount = -CleanDecimal.French('./td[position()=last()]')(item)
                 tr.type = FrenchTransaction.TYPE_CARD_SUMMARY
                 yield tr
         else:
             # This node is a real transaction.
             # Its 'date' is the date of the most recently encountered summary node.
             tr = FrenchTransaction()
             tr.date = latest_date
             date_guesser = LinearDateGuesser(latest_date)
             tr.rdate = DateGuesser(CleanText('./td[1]//text()'), date_guesser=date_guesser)(item)
             tr.label = tr.raw = CleanText('./td[2]')(item)
             tr.amount = CleanDecimal.French('./td[last()]')(item)
             tr.type = FrenchTransaction.TYPE_DEFERRED_CARD
             yield tr
Esempio n. 5
0
    def get_history(self, currency):
        self.MONTHS = self.FR_MONTHS if currency == 'EUR' else self.US_MONTHS
        #checking if the card is still valid
        if self.doc.xpath('//div[@id="errorbox"]'):
            return

        #adding a time delta because amex have hard time to put the date in a good interval
        beginning_date = self.get_beginning_debit_date() - datetime.timedelta(days=300)
        end_date = self.get_end_debit_date()

        guesser = ChaoticDateGuesser(beginning_date, end_date)

        for tr in reversed(self.doc.xpath('//div[@id="txnsSection"]//tr[@class="tableStandardText"]')):
            cols = tr.findall('td')

            t = Transaction()

            day, month = CleanText().filter(cols[self.COL_DATE]).split(' ', 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip('.')) + 1
            date = guesser.guess_date(day, month)

            rdate = None
            try:
                detail = cols[self.COL_TEXT].xpath('./div[has-class("hiddenROC")]')[0]
            except IndexError:
                pass
            else:
                m = re.search(r' (\d{2} \D{3,4})', (' '.join([txt.strip() for txt in detail.itertext()])).strip())
                if m:
                    rday, rmonth = m.group(1).split(' ')
                    rday = int(rday)
                    rmonth = self.MONTHS.index(rmonth.rstrip('.')) + 1
                    rdate = guesser.guess_date(rday, rmonth)
                detail.drop_tree()

            raw = (' '.join([txt.strip() for txt in cols[self.COL_TEXT].itertext()])).strip()
            credit = CleanText().filter(cols[self.COL_CREDIT])
            debit = CleanText().filter(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = rdate or date
            t.raw = re.sub(r'[ ]+', ' ', raw)
            t.label = re.sub('(.*?)( \d+)?  .*', r'\1', raw).strip()
            t.amount = CleanDecimal(replace_dots=currency == 'EUR').filter(credit or debit) * (1 if credit else -1)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 6
0
    def get_history(self):
        #checking if the card is still valid
        if self.document.xpath('//div[@id="errorbox"]'):
            return

        #adding a time delta because amex have hard time to put the date in a good interval
        beginning_date = self.get_beginning_debit_date() - datetime.timedelta(
            days=120)
        end_date = self.get_end_debit_date()

        guesser = ChaoticDateGuesser(beginning_date, end_date)

        for tr in reversed(
                self.document.xpath(
                    '//div[@id="txnsSection"]//tr[@class="tableStandardText"]')
        ):
            cols = tr.findall('td')

            t = Transaction(tr.attrib['id'])

            day, month = self.parser.tocleanstring(cols[self.COL_DATE]).split(
                ' ', 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip('.')) + 1
            date = guesser.guess_date(day, month)

            try:
                detail = self.parser.select(cols[self.COL_TEXT],
                                            'div.hiddenROC', 1)
            except BrokenPageError:
                pass
            else:
                detail.drop_tree()

            raw = (' '.join([
                txt.strip() for txt in cols[self.COL_TEXT].itertext()
            ])).strip()
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            debit = self.parser.tocleanstring(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = date
            t.raw = re.sub(r'[ ]+', ' ', raw)
            t.label = re.sub('(.*?)( \d+)?  .*', r'\1', raw).strip()
            t.set_amount(credit, debit)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 7
0
    def get_history(self, date_guesser, state=None):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        debit_date = None
        for i, line in enumerate(lines):
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            # It is possible to have three or four columns.
            cols = [self.parser.tocleanstring(td) for td in line.xpath('./td')]
            date = cols[0]
            label = cols[1]
            amount = cols[-1]

            t = Transaction()
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError(
                        'Unable to read card balance in history: %r' % label)
                if state is None:
                    debit_date = parse_french_date(m.group(1))
                else:
                    debit_date = state

                # Skip the first line because it is balance
                if i == 0:
                    continue

                t.date = t.rdate = debit_date

                # Consider the second one as a positive amount to reset balance to 0.
                t.amount = -t.amount
                state = t.date
            else:
                day, month = map(int, date.split('/', 1))
                t.rdate = date_guesser.guess_date(day, month)
                t.date = debit_date

            t.type = t.TYPE_CARD
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                self.logger.debug(t)
                self.logger.debug(t.label)
                raise

            yield state, t
Esempio n. 8
0
    def get_history(self, date_guesser, state=None):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        debit_date = None
        for i, line in enumerate(lines):
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            # It is possible to have three or four columns.
            cols = [self.parser.tocleanstring(td) for td in line.xpath('./td')]
            date = cols[0]
            label = cols[1]
            amount = cols[-1]

            t = Transaction()
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError('Unable to read card balance in history: %r' % label)
                if state is None:
                    debit_date = parse_french_date(m.group(1))
                else:
                    debit_date = state

                # Skip the first line because it is balance
                if i == 0:
                    continue

                t.date = t.rdate = debit_date

                # Consider the second one as a positive amount to reset balance to 0.
                t.amount = -t.amount
                state = t.date
            else:
                day, month = map(int, date.split('/', 1))
                t.rdate = date_guesser.guess_date(day, month)
                t.date = debit_date

            t.type = t.TYPE_CARD
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                self.logger.debug(t)
                self.logger.debug(t.label)
                raise

            yield state, t
Esempio n. 9
0
    def get_history(self, guesser):
        debit_date = self.get_debit_date()
        if debit_date is not None:
            guesser.current_date = debit_date

        for tr in reversed(
                self.document.xpath(
                    '//div[@id="txnsSection"]//tr[@class="tableStandardText"]')
        ):
            cols = tr.findall('td')

            t = Transaction(tr.attrib['id'])

            day, month = self.parser.tocleanstring(cols[self.COL_DATE]).split(
                ' ', 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip('.')) + 1
            date = guesser.guess_date(day, month)

            try:
                detail = self.parser.select(cols[self.COL_TEXT],
                                            'div.hiddenROC', 1)
            except BrokenPageError:
                pass
            else:
                detail.drop_tree()

            raw = (' '.join([
                txt.strip() for txt in cols[self.COL_TEXT].itertext()
            ])).strip()
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            debit = self.parser.tocleanstring(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = date
            t.raw = re.sub(r'[ ]+', ' ', raw)
            t.label = re.sub('(.*?)( \d+)?  .*', r'\1', raw).strip()
            t.set_amount(credit, debit)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 10
0
    def get_history(self):
        #checking if the card is still valid
        if self.document.xpath('//div[@id="errorbox"]'):
            return

        #adding a time delta because amex have hard time to put the date in a good interval
        beginning_date = self.get_beginning_debit_date() - datetime.timedelta(days=120)
        end_date = self.get_end_debit_date()

        guesser = ChaoticDateGuesser(beginning_date, end_date)

        for tr in reversed(self.document.xpath('//div[@id="txnsSection"]//tr[@class="tableStandardText"]')):
            cols = tr.findall('td')

            t = Transaction(tr.attrib['id'])

            day, month = self.parser.tocleanstring(cols[self.COL_DATE]).split(' ', 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip('.')) + 1
            date = guesser.guess_date(day, month)

            try:
                detail = self.parser.select(cols[self.COL_TEXT], 'div.hiddenROC', 1)
            except BrokenPageError:
                pass
            else:
                detail.drop_tree()

            raw = (' '.join([txt.strip() for txt in cols[self.COL_TEXT].itertext()])).strip()
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            debit = self.parser.tocleanstring(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = date
            t.raw = re.sub(r'[ ]+', ' ', raw)
            t.label = re.sub('(.*?)( \d+)?  .*', r'\1', raw).strip()
            t.set_amount(credit, debit)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 11
0
    def get_history(self, guesser):
        debit_date = self.get_debit_date()
        if debit_date is not None:
            guesser.current_date = debit_date

        for tr in reversed(self.document.xpath('//div[@id="txnsSection"]//tr[@class="tableStandardText"]')):
            cols = tr.findall("td")

            t = Transaction(tr.attrib["id"])

            day, month = self.parser.tocleanstring(cols[self.COL_DATE]).split(" ", 1)
            day = int(day)
            month = self.MONTHS.index(month.rstrip(".")) + 1
            date = guesser.guess_date(day, month)

            try:
                detail = self.parser.select(cols[self.COL_TEXT], "div.hiddenROC", 1)
            except BrokenPageError:
                pass
            else:
                detail.drop_tree()

            raw = (" ".join([txt.strip() for txt in cols[self.COL_TEXT].itertext()])).strip()
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            debit = self.parser.tocleanstring(cols[self.COL_DEBIT])

            t.date = date
            t.rdate = date
            t.raw = re.sub(r"[ ]+", " ", raw)
            t.label = re.sub("(.*?)( \d+)?  .*", r"\1", raw).strip()
            t.set_amount(credit, debit)
            if t.amount > 0:
                t.type = t.TYPE_ORDER
            else:
                t.type = t.TYPE_CARD

            yield t
Esempio n. 12
0
    def get_history(self, date_guesser):
        i = 0
        for tr in self.document.xpath('//table[@class="ca-table"]//tr'):
            parent = tr.getparent()
            while parent is not None and parent.tag != 'table':
                parent = parent.getparent()

            if parent.attrib.get('class', '') != 'ca-table':
                continue

            if tr.attrib.get('class', '') == 'tr-thead':
                heads = tr.findall('th')
                for i, head in enumerate(heads):
                    key = self.parser.tocleanstring(head)
                    if key == u'Crédit':
                        self.COL_CREDIT = i - len(heads)
                    elif key == u'Débit':
                        self.COL_DEBIT = i - len(heads)
                    elif key == u'Libellé':
                        self.COL_TEXT = i

            if not tr.attrib.get('class', '').startswith('ligne-'):
                continue

            cols = tr.findall('td')

            # On loan accounts, there is a ca-table with a summary. Skip it.
            if tr.find('th') is not None or len(cols) < 3:
                continue

            t = Transaction(i)

            col_text = cols[self.COL_TEXT]
            if len(col_text.xpath('.//br')) == 0:
                col_text = cols[self.COL_TEXT + 1]

            raw = self.parser.tocleanstring(col_text)
            date = self.parser.tocleanstring(cols[self.COL_DATE])
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            if self.COL_DEBIT is not None:
                debit = self.parser.tocleanstring(cols[self.COL_DEBIT])
            else:
                debit = ''

            day, month = map(int, date.split('/', 1))
            t.date = date_guesser.guess_date(day, month)
            t.rdate = t.date
            t.raw = raw

            # On some accounts' history page, there is a <font> tag in columns.
            if col_text.find('font') is not None:
                col_text = col_text.find('font')

            t.category = unicode(col_text.text.strip())
            t.label = re.sub('(.*)  (.*)', r'\2', t.category).strip()

            sub_label = col_text.find('br').tail
            if sub_label is not None and (
                    len(t.label) < 3 or t.label == t.category
                    or len(re.findall('[^\w\s]', sub_label)) /
                    float(len(sub_label)) <
                    len(re.findall('\d', t.label)) / float(len(t.label))):
                t.label = unicode(sub_label.strip())
            # Sometimes, the category contains the label, even if there is another line with it again.
            t.category = re.sub('(.*)  .*', r'\1', t.category).strip()

            t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN)

            # Parse operation date in label (for card transactions for example)
            m = re.match('(?P<text>.*) (?P<dd>[0-3]\d)/(?P<mm>[0-1]\d)$',
                         t.label)
            if not m:
                m = re.match('^(?P<dd>[0-3]\d)/(?P<mm>[0-1]\d) (?P<text>.*)$',
                             t.label)
            if m:
                if t.type in (t.TYPE_CARD, t.TYPE_WITHDRAWAL):
                    t.rdate = date_guesser.guess_date(
                        int(m.groupdict()['dd']),
                        int(m.groupdict()['mm']),
                        change_current_date=False)
                t.label = m.groupdict()['text'].strip()

            # Strip city or other useless information from label.
            t.label = re.sub('(.*)  .*', r'\1', t.label).strip()
            t.set_amount(credit, debit)
            yield t

            i += 1
Esempio n. 13
0
    def get_history(self, date_guesser):
        for tr in self.document.xpath('//table[@class="ca-table"]//tr'):
            parent = tr.getparent()
            while parent is not None and parent.tag != 'table':
                parent = parent.getparent()

            if parent.attrib.get('class', '') != 'ca-table':
                continue

            if tr.attrib.get('class', '') == 'tr-thead':
                heads = tr.findall('th')
                for i, head in enumerate(heads):
                    key = self.parser.tocleanstring(head)
                    if key == u'Débit':
                        self.COL_DEBIT = i - len(heads)
                    if key == u'Crédit':
                        self.COL_CREDIT = i - len(heads)
                    if key == u'Libellé':
                        self.COL_TEXT = i

            if not tr.attrib.get('class', '').startswith('ligne-'):
                continue

            cols = tr.findall('td')

            # On loan accounts, there is a ca-table with a summary. Skip it.
            if tr.find('th') is not None or len(cols) < 3:
                continue

            t = Transaction()

            col_text = cols[self.COL_TEXT]
            if len(col_text.xpath('.//br')) == 0:
                col_text = cols[self.COL_TEXT+1]

            raw = self.parser.tocleanstring(col_text)
            date = self.parser.tocleanstring(cols[self.COL_DATE])
            credit = self.parser.tocleanstring(cols[self.COL_CREDIT])
            if self.COL_DEBIT is not None:
                debit =  self.parser.tocleanstring(cols[self.COL_DEBIT])
            else:
                debit = ''

            day, month = map(int, date.split('/', 1))
            t.date = date_guesser.guess_date(day, month)
            t.rdate = t.date
            t.raw = raw

            # On some accounts' history page, there is a <font> tag in columns.
            if col_text.find('font') is not None:
                col_text = col_text.find('font')

            t.category = unicode(col_text.text.strip())
            t.label = re.sub('(.*)  (.*)', r'\2', t.category).strip()

            sub_label = col_text.find('br').tail
            if sub_label is not None and (len(t.label) < 3 or t.label == t.category or len(re.findall('[^\w\s]', sub_label))/float(len(sub_label)) < len(re.findall('\d', t.label))/float(len(t.label))):
                t.label = unicode(sub_label.strip())
            # Sometimes, the category contains the label, even if there is another line with it again.
            t.category = re.sub('(.*)  .*', r'\1', t.category).strip()

            t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN)

            # Parse operation date in label (for card transactions for example)
            m = re.match('(?P<text>.*) (?P<dd>[0-3]\d)/(?P<mm>[0-1]\d)$', t.label)
            if not m:
                m = re.match('^(?P<dd>[0-3]\d)/(?P<mm>[0-1]\d) (?P<text>.*)$', t.label)
            if m:
                if t.type in (t.TYPE_CARD, t.TYPE_WITHDRAWAL):
                    t.rdate = date_guesser.guess_date(int(m.groupdict()['dd']), int(m.groupdict()['mm']), change_current_date=False)
                t.label = m.groupdict()['text'].strip()

            # Strip city or other useless information from label.
            t.label = re.sub('(.*)  .*', r'\1', t.label).strip()
            t.set_amount(credit, debit)
            yield t