Exemple #1
0
 def filter(self, url):
     qs = parse_qs(urlparse(url).query)
     if not qs.get(self.querykey):
         return self.default_or_raise(ParseError('Key %s not found' % self.querykey))
     if len(qs[self.querykey]) > 1:
         raise ParseError('More than one value for key %s' % self.querykey)
     return qs[self.querykey][0]
Exemple #2
0
 def filter(self, txt):
     if empty(txt):
         return self.default_or_raise(ParseError('Unable to parse %r' % txt))
     if self.minlen is not False and len(txt) <= self.minlen:
         return self.default_or_raise(ParseError('Unable to parse %r' % txt))
     try:
         return self.type_func(txt)
     except ValueError as e:
         return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e)))
Exemple #3
0
    def iter_accounts(self):
        if not self.islogged:
            self.login()

        data = self.browser.open(
            "https://www.cmb.fr/domiweb/prive/particulier/releve/0-releve.act"
        ).content
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(data), parser)

        table = tree.xpath('/html/body/table')
        if len(table) == 0:
            title = tree.xpath('/html/head/title')[0].text
            if title == u"Utilisateur non identifié":
                self.login()
                data = self.browser.open(
                    "https://www.cmb.fr/domiweb/prive/particulier/releve/0-releve.act"
                ).content

                parser = etree.HTMLParser()
                tree = etree.parse(StringIO(data), parser)
                table = tree.xpath('/html/body/table')
                if len(table) == 0:
                    raise ParseError()
            else:
                raise ParseError()

        for tr in tree.xpath('/html/body//table[contains(@class, "Tb")]/tr'):
            if tr.get('class',
                      None) not in ('LnTit', 'LnTot', 'LnMnTiers', None):
                account = Account()
                td = tr.xpath('td')

                a = td[1].xpath('a')
                account.label = unicode(a[0].text).strip()
                href = a[0].get('href')
                m = match(r"javascript:releve\((.*),'(.*)','(.*)'\)", href)
                if not m:
                    continue
                account.id = unicode(m.group(1) + m.group(2) + m.group(3))
                account._cmbvaleur = m.group(1)
                account._cmbvaleur2 = m.group(2)
                account._cmbtype = m.group(3)

                balance = u''.join([txt.strip() for txt in td[2].itertext()])
                balance = balance.replace(',', '.').replace(u"\xa0", '')
                account.balance = Decimal(balance)

                span = td[4].xpath('a/span')
                if len(span):
                    coming = span[0].text.replace(' ', '').replace(',', '.')
                    coming = coming.replace(u"\xa0", '')
                    account.coming = Decimal(coming)
                else:
                    account.coming = NotAvailable

                yield account
Exemple #4
0
 def filter(self, txt):
     if empty(txt) or txt == '':
         return self.default_or_raise(ParseError('Unable to parse %r' % txt))
     try:
         if self.translations:
             for search, repl in self.translations:
                 txt = search.sub(repl, txt)
         return self.parse_func(txt, dayfirst=self.dayfirst, fuzzy=self.fuzzy)
     except (ValueError, TypeError) as e:
         return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e)))
Exemple #5
0
    def login(self, username, password):
        self.location(
            'https://www.facebook.com/v2.6/dialog/oauth?redirect_uri=fb464891386855067%3A%2F%2Fauthorize%2F&display=touch&state=%7B%22challenge%22%3A%22IUUkEUqIGud332lfu%252BMJhxL4Wlc%253D%22%2C%220_auth_logger_id%22%3A%2230F06532-A1B9-4B10-BB28-B29956C71AB1%22%2C%22com.facebook.sdk_client_state%22%3Atrue%2C%223_method%22%3A%22sfvc_auth%22%7D&scope=user_birthday%2Cuser_photos%2Cuser_education_history%2Cemail%2Cuser_relationship_details%2Cuser_friends%2Cuser_work_history%2Cuser_likes&response_type=token%2Csigned_request&default_audience=friends&return_scopes=true&auth_type=rerequest&client_id='
            + self.CLIENT_ID +
            '&ret=login&sdk=ios&logger_id=30F06532-A1B9-4B10-BB28-B29956C71AB1&ext=1470840777&hash=AeZqkIcf-NEW6vBd'
        )
        page = HTMLPage(self, self.response)
        form = page.get_form()
        form['email'] = username
        form['pass'] = password
        form.submit(allow_redirects=False)
        if 'Location' not in self.response.headers:
            raise BrowserIncorrectPassword()

        self.location(self.response.headers['Location'])

        page = HTMLPage(self, self.response)
        if len(page.doc.xpath('//td/div[has-class("s")]')) > 0:
            raise BrowserIncorrectPassword(
                CleanText('//td/div[has-class("s")]')(page.doc))

        form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]')
        form.submit()

        m = re.search('access_token=([^&]+)&', self.response.text)
        if m:
            self.access_token = m.group(1)
        else:
            raise ParseError('Unable to find access_token')

        self.info = self.request('/me')
Exemple #6
0
    def login(self, username, password):
        self.location('https://www.facebook.com/v2.9/dialog/oauth?app_id=484681304938818&auth_type=rerequest&channel_url=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df33dd8340f36618%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener&client_id=484681304938818&display=popup&domain=www.okcupid.com&e2e=%7B%7D&fallback_redirect_uri=https%3A%2F%2Fwww.okcupid.com%2Flogin&locale=en_US&origin=1&redirect_uri=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df2ce4ca90b82cb4%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener%26frame%3Df3f40f304ac5e9&response_type=token%2Csigned_request&scope=email%2Cuser_birthday%2Cuser_photos&sdk=joey&version=v2.9')

        page = HTMLPage(self, self.response)
        form = page.get_form('//form[@id="login_form"]')
        form['email'] = username
        form['pass'] = password
        self.session.headers['cookie-installing-permission'] = 'required'
        self.session.cookies['wd'] = '640x1033'
        self.session.cookies['act'] = '1563018648141%2F0'
        form.submit(allow_redirects=False)
        if 'Location' not in self.response.headers:
            raise BrowserIncorrectPassword()

        self.location(self.response.headers['Location'])

        page = HTMLPage(self, self.response)
        if len(page.doc.xpath('//td/div[has-class("s")]')) > 0:
            raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc))

        script = page.doc.xpath('//script')[0].text

        m = re.search('access_token=([^&]+)&', script)
        if m:
            self.access_token = m.group(1)
        else:
            raise ParseError('Unable to find access_token')
Exemple #7
0
    def get_history(self, account, coming=False):
        if account.type is Account.TYPE_LOAN or not account._consultable:
            raise NotImplementedError()

        if account._univers != self.current_univers:
            self.move_to_univers(account._univers)

        today = date.today()
        seen = set()
        offset = 0
        next_page = True
        while next_page:
            operation_list = self._make_api_call(
                account=account,
                start_date=date(day=1, month=1, year=2000), end_date=date.today(),
                offset=offset, max_length=50,
            )
            transactions = []
            for op in reversed(operation_list):
                t = Transaction()
                t.id = op['id']
                if op['id'] in seen:
                    raise ParseError('There are several transactions with the same ID, probably an infinite loop')

                seen.add(t.id)
                d = date.fromtimestamp(op.get('dateDebit', op.get('dateOperation'))/1000)
                op['details'] = [re.sub('\s+', ' ', i).replace('\x00', '') for i in op['details'] if i]  # sometimes they put "null" elements...
                label = re.sub('\s+', ' ', op['libelle']).replace('\x00', '')
                raw = ' '.join([label] + op['details'])
                vdate = date.fromtimestamp(op.get('dateValeur', op.get('dateDebit', op.get('dateOperation')))/1000)
                t.parse(d, raw, vdate=vdate)
                t.amount = Decimal(str(op['montant']))
                t.rdate = date.fromtimestamp(op.get('dateOperation', op.get('dateDebit'))/1000)
                if 'categorie' in op:
                    t.category = op['categorie']
                t.label = label
                t._coming = op['intraday']
                if t._coming:
                    # coming transactions have a random uuid id (inconsistent between requests)
                    t.id = ''
                t._coming |= (t.date > today)

                if t.type == Transaction.TYPE_CARD and account.type == Account.TYPE_CARD:
                    t.type = Transaction.TYPE_DEFERRED_CARD

                transactions.append(t)

            # Transactions are unsorted
            for t in sorted_transactions(transactions):
                if coming == t._coming:
                    yield t
                elif coming and not t._coming:
                    # coming transactions are at the top of history
                    self.logger.debug('stopping coming after %s', t)
                    return

            next_page = bool(transactions)
            offset += 50

            assert offset < 30000, 'the site may be doing an infinite loop'
Exemple #8
0
        def obj_url(self):
            data = self.el

            if not data['request']['files']:
                raise ParseError(
                    'Unable to detect any stream method for id: %r (available: %s)'
                    %
                    (int(Field('id')(self)), data['request']['files'].keys()))

            # Choosen method is not available, we choose an other one
            method = self.obj._method
            if method not in data['request']['files']:
                method = data['request']['files'].keys()[0]

            streams = data['request']['files'][method]
            if not streams:
                raise ValueError('There is no url available for id: %r' %
                                 (int(Field('id')(self))))

            # stream is single for hls, just return the url
            stream = streams['url'] if method == 'hls' else None

            # ...but a list for progressive
            # we assume the list is sorted by quality with best first
            if not stream:
                quality = self.obj._quality
                stream = streams[quality]['url'] if quality < len(
                    streams) else streams[0]['url']

            return stream.split('?')[0]
Exemple #9
0
    def login(self, username, password):
        self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=fbconnect://success&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID)
        page = HTMLPage(self, self.response)
        form = page.get_form('//form[@id="login_form"]')
        form['email'] = username
        form['pass'] = password
        form.submit(allow_redirects=False)
        if 'Location' not in self.response.headers:
            raise BrowserIncorrectPassword()

        self.location(self.response.headers['Location'])

        page = HTMLPage(self, self.response)
        if len(page.doc.xpath('//td/div[has-class("s")]')) > 0:
            raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc))

        form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]')
        form.submit()

        m = re.search('access_token=([^&]+)&', self.response.text)
        if m:
            self.access_token = m.group(1)
        else:
            raise ParseError('Unable to find access_token')

        self.info = self.request('/me')
Exemple #10
0
    def build_doc(self, content):
        try:
            doc = decompress_pdf(content)
        except OSError as e:
            raise ParseError(u'Make sure mupdf-tools is installed (%s)' % e)

        return doc
Exemple #11
0
        def obj__formats(self):
            player = Regexp(CleanText('//script'),
                            '.*buildPlayer\((.*}})\);.*',
                            default=None)(self)
            if player:
                info = json.loads(player)
                if info.get('error') is not None:
                    raise ParseError(info['error']['title'])
                metadata = info.get('metadata')

                formats = {}
                for quality, media_list in metadata['qualities'].items():
                    for media in media_list:
                        media_url = media.get('url')
                        if not media_url:
                            continue
                        type_ = media.get('type')
                        if type_ == 'application/vnd.lumberjack.manifest':
                            continue
                        ext = determine_ext(media_url)
                        if ext in formats:
                            if quality in formats.get(ext):
                                formats[ext][quality] = media_url
                            else:
                                formats[ext] = {quality: media_url}
                        else:
                            formats[ext] = {quality: media_url}

            return formats
Exemple #12
0
            def __call__(self, item):
                raw = super(Filter, self).__call__(item)
                if item.obj.rdate is NotLoaded:
                    item.obj.rdate = item.obj.date
                item.obj.category = NotAvailable
                if '  ' in raw:
                    item.obj.category, useless, item.obj.label = [part.strip() for part in raw.partition('  ')]
                else:
                    item.obj.label = raw

                for pattern, _type in patterns:
                    m = pattern.match(raw)
                    if m:
                        args = m.groupdict()

                        def inargs(key):
                            """
                            inner function to check if a key is in args,
                            and is not None.
                            """
                            return args.get(key, None) is not None

                        item.obj.type = _type
                        if inargs('text'):
                            item.obj.label = args['text'].strip()
                        if inargs('category'):
                            item.obj.category = args['category'].strip()

                        # Set date from information in raw label.
                        if inargs('dd') and inargs('mm'):
                            dd = int(args['dd']) if args['dd'] != '00' else 1
                            mm = int(args['mm'])

                            if inargs('yy'):
                                yy = int(args['yy'])
                            else:
                                d = item.obj.date
                                try:
                                    d = d.replace(month=mm, day=dd)
                                except ValueError:
                                    d = d.replace(year=d.year-1, month=mm, day=dd)

                                yy = d.year
                                if d > item.obj.date:
                                    yy -= 1

                            if yy < 100:
                                yy += 2000

                            try:
                                if inargs('HH') and inargs('MM'):
                                    item.obj.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM']))
                                else:
                                    item.obj.rdate = datetime.date(yy, mm, dd)
                            except ValueError as e:
                                raise ParseError('Unable to parse date in label %r: %s' % (raw, e))

                        break

                return raw
Exemple #13
0
    def login(self, login, passwd):
        try:
            vk = LCLVirtKeyboard(self)
        except VirtKeyboardError as err:
            self.logger.exception(err)
            return False

        password = vk.get_string_code(passwd)

        seed = -1
        s = "var aleatoire = "
        for script in self.doc.findall("//script"):
            if script.text is None or len(script.text) == 0:
                continue
            offset = script.text.find(s)
            if offset != -1:
                seed = int(script.text[offset+len(s)+1:offset+len(s)+2])
                break
        if seed==-1:
            raise ParseError("Variable 'aleatoire' not found")

        form = self.get_form('//form[@id="formAuthenticate"]')
        form['identifiant'] = login
        form['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
        try:
            form['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
        except AttributeError:
            pass

        try:
            form.submit()
        except BrowserUnavailable:
            # Login is not valid
            return False
        return True
Exemple #14
0
 def transfer(self, account, recipient, amount, reason):
     found = False
     # Automatically get the good transfer page
     self.logger.debug('Search %s' % recipient)
     for destination in self.get_recipients(account):
         self.logger.debug('Found %s ' % destination.id)
         if destination.id == recipient:
             found = True
             recipient = destination
             break
     if found:
         self.transferpage.open(
             data=self.page.buildonclick(recipient, account))
         self.page.transfer(recipient, amount, reason)
         self.valtransferpage.go()
         if not self.valtransferpage.is_here():
             raise TransferError("Invalid transfer (no confirmation page)")
         else:
             self.page.confirm(self.password)
             self.valtransferpage.go()
             recap = self.page.recap()
             if len(list(recap)) == 0:
                 raise ParseError('Unable to find confirmation')
             return self.page.recap()
     else:
         raise TransferError('Recipient not found')
Exemple #15
0
    def filter(self, txt):
        m = self._regexp.search(txt)
        if m:
            kwargs = {}
            for key, index in self.kwargs.items():
                kwargs[key] = int(m.groupdict()[index] or 0)
            return self.klass(**kwargs)

        return self.default_or_raise(ParseError('Unable to find time in %r' % txt))
Exemple #16
0
    def get_link(self, name):
        for script in self.doc.xpath('//script'):
            m = re.search(r"""\["%s",'([^']+)'""" % name,
                          script.text or '',
                          flags=re.MULTILINE)
            if m:
                return m.group(1)

        raise ParseError('Link %r not found' % name)
Exemple #17
0
    def login(self, login, password):
        vk = VirtKeyboard(self)

        form = self.get_form('//form[@id="formulaire-login"]')
        code = vk.get_string_code(password)
        assert len(code)==10, ParseError("Wrong number of character.")
        form['identifiant'] = login
        form['code'] = code
        form.submit()
Exemple #18
0
 def to_python(self, m):
     "Convert MatchObject to python value"
     values = m.groupdict()
     for t, v in values.iteritems():
         if v is not None:
             break
     if self.need_type and t != self.need_type:
         raise ParseError('Value with type %s not found' % self.need_type)
     if t in ('int', 'float'):
         return literal_eval(v)
     if t == 'str':
         return literal_eval(v).decode('utf-8')
     if t == 'bool':
         return v == 'true'
     if t == 'None':
         return
     if self.default:
         return self.default
     raise ParseError('Unable to parse %r value' % m.group(0))
Exemple #19
0
 def get_date(self):
     date = None
     dt_elt = self.doc.xpath(
         '//td[text()="Période d\'emploi"]/following-sibling::td')
     if not dt_elt:
         dt_elt = self._doc2.xpath(
             '//td[text()="Période d\'emploi"]/following-sibling::td')
     if dt_elt:
         date = Date(Regexp(CleanText("."), r"au (\d{2}\/\d{2}\/\d{4})"),
                     dayfirst=True)(dt_elt[0])
     else:
         raise ParseError()
     return date
Exemple #20
0
            def parse(self, el):
                link = el.xpath('./td[1]/a')[0].get('href', '')
                if link.startswith('POR_SyntheseLst'):
                    raise SkipItem()

                url = urlparse(link)
                p = parse_qs(url.query)
                if 'rib' not in p and 'webid' not in p:
                    raise SkipItem()

                for td in el.xpath('./td[2] | ./td[3]'):
                    try:
                        balance = CleanDecimal('.', replace_dots=True)(td)
                    except InvalidOperation:
                        continue
                    else:
                        break
                else:
                    raise ParseError('Unable to find balance for account %s' %
                                     CleanText('./td[1]/a')(el))

                id = p['rib'][0] if 'rib' in p else p['webid'][0]

                # Handle cards
                if id in self.parent.objects:
                    account = self.parent.objects[id]
                    if not account.coming:
                        account.coming = Decimal('0.0')
                    account.coming += balance
                    account._card_links.append(link)
                    raise SkipItem()

                self.env['id'] = id

                # Handle real balances
                page = self.page.browser.open(link).page
                coming = page.find_amount(
                    u"Opérations à venir") if page else None
                accounting = page.find_amount(
                    u"Solde comptable") if page else None

                if accounting is not None and accounting + (
                        coming or Decimal('0')) != balance:
                    self.page.logger.warning('%s + %s != %s' %
                                             (accounting, coming, balance))

                if accounting is not None:
                    balance = accounting

                self.env['balance'] = balance
                self.env['coming'] = coming or NotAvailable
Exemple #21
0
    def get_history(self, account):
        if not account._consultable:
            raise NotImplementedError()

        if account._univers != self.current_univers:
            self.move_to_univers(account._univers)
        offset = 0
        next_page = True
        seen = set()
        while next_page:
            r = self.api_open(
                '/transactionnel/services/applications/operations/get/%(number)s/%(nature)s/00/%(currency)s/%(startDate)s/%(endDate)s/%(offset)s/%(limit)s'
                % {
                    'number': account._number,
                    'nature': account._nature,
                    'currency': account.currency,
                    'startDate': '2000-01-01',
                    'endDate': date.today().strftime('%Y-%m-%d'),
                    'offset': offset,
                    'limit': 50
                })
            next_page = False
            offset += 50
            transactions = []
            for op in reversed(r.json()['content']['operations']):
                next_page = True
                t = Transaction()
                if op['id'] in seen:
                    raise ParseError(
                        'There are several transactions with the same ID, probably an infinite loop'
                    )
                t.id = op['id']
                seen.add(t.id)
                t.amount = Decimal(str(op['montant']))
                t.date = date.fromtimestamp(
                    op.get('dateDebit', op.get('dateOperation')) / 1000)
                t.rdate = date.fromtimestamp(
                    op.get('dateOperation', op.get('dateDebit')) / 1000)
                t.vdate = date.fromtimestamp(
                    op.get('dateValeur',
                           op.get('dateDebit', op.get('dateOperation'))) /
                    1000)
                if 'categorie' in op:
                    t.category = op['categorie']
                t.label = op['libelle']
                t.raw = ' '.join([op['libelle']] + op['details'])
                transactions.append(t)

            # Transactions are unsorted
            for t in sorted(transactions, key=lambda t: t.rdate, reverse=True):
                yield t
Exemple #22
0
    def __call__(self, item):
        values = self.select(self.selector, item)
        date_guesser = self.date_guesser
        # In case Env() is used to kive date_guesser.
        if isinstance(date_guesser, _Filter):
            date_guesser = self.select(date_guesser, item)

        if isinstance(values, basestring):
            values = re.split('[/-]', values)
        if len(values) == 2:
            day, month = map(int, values)
        else:
            raise ParseError('Unable to take (day, month) tuple from %r' % values)
        return date_guesser.guess_date(day, month, **self.kwargs)
Exemple #23
0
    def login(self, login, password):
        vk = VirtKeyboard(self)

        form = self.get_form('//form[@id="formulaire-login"]')
        code = vk.get_string_code(password)
        assert len(code) == 10, ParseError("Wrong number of character.")
        form['identifiant'] = login
        form['codePinpad'] = code
        form['task'] = 'Login'
        form['process'] = 'Login'
        form['eventid'] = 'suivant'
        form['modeCodeSecret'] = 'pinpad'
        form['personneIdentifiee'] = 'N'
        form.submit()
Exemple #24
0
 def obj_url(self):
     quality = 'sd'
     codec = None
     data = self.el
     if 'vp6' in data['request']['files']:
         codec = 'vp6'
     if 'vp8' in data['request']['files']:
         codec = 'vp8'
     if 'h264' in data['request']['files']:
         codec = 'h264'
     if not codec:
         raise ParseError(
             'Unable to detect available codec for id: %r' %
             int(Field('id')(self)))
     if 'hd' in data['request']['files'][codec]:
         quality = 'hd'
     return data['request']['files'][codec][quality]['url']
Exemple #25
0
    def filter(self, text):
        if empty(text):
            return self.default_or_raise(ParseError('Unable to parse %r' % text))

        original_text = text = super(CleanDecimal, self).filter(text)
        if self.replace_dots:
            if type(self.replace_dots) is tuple:
                thousands_sep, decimal_sep = self.replace_dots
            else:
                thousands_sep, decimal_sep = '.', ','
            text = text.replace(thousands_sep, '').replace(decimal_sep, '.')
        try:
            v = Decimal(re.sub(r'[^\d\-\.]', '', text))
            if self.sign:
                v *= self.sign(original_text)
            return v
        except InvalidOperation as e:
            return self.default_or_raise(e)
Exemple #26
0
    def get_content(self, _id):
        url, _id = self.parse_id(_id)

        if url is None:
            return None

        self.location(url)

        if self.comment.is_here():
            content = self.page.get_comment()
        elif self.content.is_here():
            m = re.match('.*#comment-(\d+)$', url)
            if m:
                content = self.page.get_comment(int(m.group(1)))
            else:
                content = self.page.get_article()
        else:
            raise ParseError('Not on a content or comment page (%r)' %
                             self.page)

        if _id is not None:
            content.id = _id
        return content
Exemple #27
0
    def iter_history(self, account):
        if not self.islogged:
            self.login()

        page = "https://www.cmb.fr/domiweb/prive/particulier/releve/"
        if account._cmbtype == 'D':
            page += "10-releve.act"
        else:
            page += "2-releve.act"
        page += "?noPageReleve=1&indiceCompte="
        page += account._cmbvaleur
        page += "&typeCompte="
        page += account._cmbvaleur2
        page += "&deviseOrigineEcran=EUR"

        data = self.browser.open(page).content
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(data), parser)

        tables = tree.xpath('/html/body/table')
        if len(tables) == 0:
            title = tree.xpath('/html/head/title')[0].text
            if title == u"Utilisateur non identifié":
                self.login()
                data = self.browser.open(page).content

                parser = etree.HTMLParser()
                tree = etree.parse(StringIO(data), parser)
                tables = tree.xpath('/html/body/table')
                if len(tables) == 0:
                    raise ParseError()
            else:
                raise ParseError()

        i = 0

        for table in tables:
            if table.get('id') != "tableMouvements":
                continue
            for tr in table.getiterator('tr'):
                if (tr.get('class') != 'LnTit' and tr.get('class') != 'LnTot'):
                    operation = Transaction(i)
                    td = tr.xpath('td')

                    div = td[1].xpath('div')
                    d = div[0].text.split('/')
                    operation.date = date(*reversed([int(x) for x in d]))

                    div = td[2].xpath('div')
                    label = div[0].xpath('a')[0].text.replace('\n', '')
                    operation.raw = unicode(' '.join(label.split()))
                    for pattern, _type, _label in self.LABEL_PATTERNS:
                        mm = pattern.match(operation.raw)
                        if mm:
                            operation.type = _type
                            operation.label = sub('[ ]+', ' ', _label %
                                                  mm.groupdict()).strip()
                            break

                    amount = td[3].text
                    if amount.count(',') != 1:
                        amount = td[4].text
                        amount = amount.replace(',', '.').replace(u'\xa0', '')
                        operation.amount = Decimal(amount)
                    else:
                        amount = amount.replace(',', '.').replace(u'\xa0', '')
                        operation.amount = -Decimal(amount)

                    i += 1
                    yield operation
Exemple #28
0
            def parse(self, el):
                link = el.xpath('./td[1]/a')[0].get('href', '')
                if 'POR_SyntheseLst' in link:
                    raise SkipItem()

                url = urlparse(link)
                p = parse_qs(url.query)
                if 'rib' not in p and 'webid' not in p:
                    raise SkipItem()

                for td in el.xpath('./td[2] | ./td[3]'):
                    try:
                        balance = CleanDecimal('.', replace_dots=True)(td)
                    except InvalidOperation:
                        continue
                    else:
                        break
                else:
                    raise ParseError('Unable to find balance for account %s' %
                                     CleanText('./td[1]/a')(el))

                self.env['_is_webid'] = False
                if self.page.browser.is_new_website:
                    id = CleanText(
                        './td[1]/a/node()[contains(@class, "doux")]',
                        replace=[(' ', '')])(el)
                else:
                    if 'rib' in p:
                        id = p['rib'][0]
                    else:
                        id = p['webid'][0]
                        self.env['_is_webid'] = True

                page = self.page.browser.open(link).page

                # Handle cards
                if id in self.parent.objects:
                    if page.is_fleet() or id in self.page.browser.fleet_pages:
                        if not id in self.page.browser.fleet_pages:
                            self.page.browser.fleet_pages[id] = []
                        self.page.browser.fleet_pages[id].append(page)
                    else:
                        account = self.parent.objects[id]
                        if not account.coming:
                            account.coming = Decimal('0.0')
                        account.coming += balance
                        account._card_links.append(link)
                    raise SkipItem()

                self.env['id'] = id

                # Handle real balances
                coming = page.find_amount(
                    u"Opérations à venir") if page else None
                accounting = page.find_amount(
                    u"Solde comptable") if page else None

                if accounting is not None and accounting + (
                        coming or Decimal('0')) != balance:
                    self.page.logger.warning('%s + %s != %s' %
                                             (accounting, coming, balance))

                if accounting is not None:
                    balance = accounting

                self.env['balance'] = balance
                self.env['coming'] = coming or NotAvailable
Exemple #29
0
 def filter(self, timestamp):
     try:
         return dt.date.fromtimestamp(int(timestamp[:-3]))
     except TypeError:
         return self.default_or_raise(
             ParseError('Element %r not found' % self.selector))
Exemple #30
0
 def __call__(self, item):
     try:
         return item.env[self.name]
     except KeyError:
         return self.default_or_raise(
             ParseError('Environment variable %s not found' % self.name))