def do_login(self): self.login.go() if self.home.is_here(): return self.page.login(self.username, self.password, self.lastname) if not self.home.is_here(): raise BrowserIncorrectPassword() # after login we need to get some tokens to use bouygues api data = { 'response_type': 'id_token token', 'client_id': 'a360.bouyguestelecom.fr', 'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/' } self.location('https://oauth2.bouyguestelecom.fr/authorize', params=data) parsed_url = urlparse(self.response.url) fragment = parse_qs(parsed_url.fragment) if not fragment: query = parse_qs(parsed_url.query) if 'server_error' in query.get('error', []): raise BrowserUnavailable(query['error_description'][0]) claims = jwt.get_unverified_claims(fragment['id_token'][0]) self.headers = {'Authorization': 'Bearer %s' % fragment['access_token'][0]} self.id_user = claims['id_personne']
def get_params(self, url): parsed = urlparse(url) base_url, params = parsed.path, parse_qs(parsed.query) for a in self.doc.xpath( '//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]' ): params['sessionid'] = parse_qs(urlparse( Link('.')(a)).query)['sessionid'] yield base_url, params
def filter(self, url): qs = parse_qs(urlparse(url).query) if not qs.get(self.querykey): return self.default_or_raise(ItemNotFound('Key %s not found' % self.querykey)) if len(qs[self.querykey]) > 1: raise FilterError('More than one value for key %s' % self.querykey) return qs[self.querykey][0]
def obj__form(self): form_id = Attr('.//td/a', 'id', default=None)(self) if form_id: id_contrat = re.search(r'^(.*?)-', form_id).group(1) producteur = re.search(r'-(.*?)$', form_id).group(1) else: if len(self.xpath( './/td/a[has-class("clickPopupDetail")]')): # making a form of this link sometimes makes the site return an empty response... # the link is a link to some info, not full AV website # it's probably an indication the account is restricted anyway, so avoid it self.logger.debug( "account %r is probably restricted, don't try its form", Field('id')(self)) return None # sometimes information are not in id but in href url = Attr('.//td/a', 'href', default=None)(self) parsed_url = urlparse(url) params = parse_qs(parsed_url.query) id_contrat = params['ID_CONTRAT'][0] producteur = params['PRODUCTEUR'][0] form = self.page.get_form('//form[@id="formRoutage"]') form['ID_CONTRAT'] = id_contrat form['PRODUCTEUR'] = producteur return form
def obj__form(self): form_id = Attr('.//td/a', 'id', default=None)(self) form_class = Attr('.//td/a', 'class', default=None)(self) if form_id: if '-' in form_id: id_contrat = re.search(r'^(.*?)-', form_id).group(1) producteur = re.search(r'-(.*?)$', form_id).group(1) else: id_contrat = form_id producteur = None else: if len(self.xpath('.//td/a[has-class("clickPopupDetail")]')): # making a form of this link sometimes makes the site return an empty response... # the link is a link to some info, not full AV website # it's probably an indication the account is restricted anyway, so avoid it self.logger.debug("account %r is probably restricted, don't try its form", Field('id')(self)) return None # sometimes information are not in id but in href url = Attr('.//td/a', 'href', default=None)(self) parsed_url = urlparse(url) params = parse_qs(parsed_url.query) id_contrat = params['ID_CONTRAT'][0] producteur = params['PRODUCTEUR'][0] if 'redirect' in form_class: form = self.page.get_form('//form[@id="formRedirectPart"]') else: form = self.page.get_form('//form[@id="formRoutage"]') form['PRODUCTEUR'] = producteur form['ID_CONTRAT'] = id_contrat return form
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool(self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False if is_deferred_card: coming_debit_date = None # get coming debit date for deferred_card date_string = Regexp(CleanText(u'//option[contains(text(), "détail des factures à débiter le")]'), r'(\d{2}/\d{2}/\d{4})', default=NotAvailable)(self.doc) if date_string: coming_debit_date = parse_d(date_string) while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: if coming_debit_date: tr.date = coming_debit_date tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def do_login(self): params = {'response_type': 'code', 'client_id': '534890559860-r6gn7e3agcpiriehe63dkeus0tpl5i4i.apps.googleusercontent.com', 'redirect_uri': self.redirect_uri} queryString = "&".join([key+'='+value for key, value in params.items()]) self.google_login.go(auth='o/oauth2/auth', params=queryString).login(self.username, self.password) if self.google_login.is_here(): self.page.login(self.username, self.password) try: self.code = parse_qs(urlparse(self.url).query).get('code')[0] except: raise BrowserIncorrectPassword()
def url2id(url, nopost=False): v = urlsplit(url) pagename = v.path.split('/')[-1] args = parse_qs(v.query) if pagename == 'viewforum.php': return '%d' % int(args['f'][0]) if pagename == 'viewtopic.php': if 'f' in args: s = '%d' % int(args['f'][0]) else: s = '0' s += '.%d' % int(args['t'][0]) if 'p' in args and not nopost: s += '.%d' % int(args['p'][0]) return s return None
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool( self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def on_load(self): div = self.doc.xpath('//div[has-class("pagination")]')[0] strongs = div.xpath('.//strong') self.cur_page = int(strongs[0].text.strip()) self.tot_pages = int(strongs[1].text.strip()) try: url = self.doc.xpath('//h2/a')[-1].attrib['href'] except (IndexError, KeyError): url = self.url v = urlsplit(url) args = parse_qs(v.query) self.topic_id = int(args['t'][0]) self.forum_id = int(args['f'][0]) if 'f' in args else 0 self.forum_title = u'' nav = self.doc.xpath('//li[has-class("icon-home")]') if len(nav) > 0: text = nav[0].xpath('.//a')[-1].text.strip() if len(text) >= 20: text = text[:20] + u'…' self.forum_title = '[%s] ' % text
def get_params(self, url): parsed = urlparse(url) base_url, params = parsed.path, parse_qs(parsed.query) for a in self.doc.xpath('//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]'): params['sessionid'] = parse_qs(urlparse(Link('.')(a)).query)['sessionid'] yield base_url, params
def iter_torrents(self): table = self.document.getroot().cssselect('table.torrent_table') if not table: table = self.document.getroot().cssselect( 'table#browse_torrent_table') if table: table = table[0] current_group = None for tr in table.findall('tr'): if tr.attrib.get('class', '') == 'colhead': # ignore continue if tr.attrib.get('class', '') == 'group': tds = tr.findall('td') current_group = u'' div = tds[-6] if div.getchildren()[0].tag == 'div': div = div.getchildren()[0] for a in div.findall('a'): if not a.text: continue if current_group: current_group += ' - ' current_group += a.text elif tr.attrib.get('class', '').startswith('group_torrent') or \ tr.attrib.get('class', '').startswith('torrent'): tds = tr.findall('td') title = current_group if len(tds) == 7: # Under a group i = 0 elif len(tds) in (8, 9): # An alone torrent i = len(tds) - 1 while i >= 0 and tds[i].find('a') is None: i -= 1 else: # Useless title continue if title: title += u' (%s)' % tds[i].find('a').text else: title = ' - '.join( [a.text for a in tds[i].findall('a')]) url = urlparse(tds[i].find('a').attrib['href']) params = parse_qs(url.query) if 'torrentid' in params: id = '%s.%s' % (params['id'][0], params['torrentid'][0]) else: url = tds[i].find('span').find('a').attrib['href'] m = self.TORRENTID_REGEXP.match(url) if not m: continue id = '%s.%s' % (params['id'][0], m.group(1)) try: size, unit = tds[i + 3].text.split() except ValueError: size, unit = tds[i + 2].text.split() size = get_bytes_size(float(size.replace(',', '')), unit) seeders = int(tds[-2].text) leechers = int(tds[-1].text) torrent = Torrent(id, title) torrent.url = self.format_url(url) torrent.size = size torrent.seeders = seeders torrent.leechers = leechers yield torrent else: debug('unknown attrib: %s' % tr.attrib)
def iter_torrents(self): table = self.document.getroot().cssselect('table.torrent_table') if not table: table = self.document.getroot().cssselect('table#browse_torrent_table') if table: table = table[0] current_group = None for tr in table.findall('tr'): if tr.attrib.get('class', '') == 'colhead': # ignore continue if tr.attrib.get('class', '') == 'group': tds = tr.findall('td') current_group = u'' div = tds[-6] if div.getchildren()[0].tag == 'div': div = div.getchildren()[0] for a in div.findall('a'): if not a.text: continue if current_group: current_group += ' - ' current_group += a.text elif tr.attrib.get('class', '').startswith('group_torrent') or \ tr.attrib.get('class', '').startswith('torrent'): tds = tr.findall('td') title = current_group if len(tds) == 7: # Under a group i = 0 elif len(tds) in (8, 9): # An alone torrent i = len(tds) - 1 while i >= 0 and tds[i].find('a') is None: i -= 1 else: # Useless title continue if title: title += u' (%s)' % tds[i].find('a').text else: title = ' - '.join([a.text for a in tds[i].findall('a')]) url = urlparse(tds[i].find('a').attrib['href']) params = parse_qs(url.query) if 'torrentid' in params: id = '%s.%s' % (params['id'][0], params['torrentid'][0]) else: url = tds[i].find('span').find('a').attrib['href'] m = self.TORRENTID_REGEXP.match(url) if not m: continue id = '%s.%s' % (params['id'][0], m.group(1)) try: size, unit = tds[i + 3].text.split() except ValueError: size, unit = tds[i + 2].text.split() size = get_bytes_size(float(size.replace(',', '')), unit) seeders = int(tds[-2].text) leechers = int(tds[-1].text) torrent = Torrent(id, title) torrent.url = self.format_url(url) torrent.size = size torrent.seeders = seeders torrent.leechers = leechers yield torrent else: debug('unknown attrib: %s' % tr.attrib)
def get_code(self): return parse_qs(urlparse(self.url).query)['code'][0]