def get_params(self, url): parsed = urlparse(url) base_url, params = parsed.path, parse_qs(parsed.query) for a in self.doc.xpath( '//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]' ): params['sessionid'] = parse_qs(urlparse( Link('.')(a)).query)['sessionid'] yield base_url, params
def obj__form(self): form_id = Attr('.//td/a', 'id', default=None)(self) if form_id: id_contrat = re.search(r'^(.*?)-', form_id).group(1) producteur = re.search(r'-(.*?)$', form_id).group(1) else: if len(self.xpath( './/td/a[has-class("clickPopupDetail")]')): # making a form of this link sometimes makes the site return an empty response... # the link is a link to some info, not full AV website # it's probably an indication the account is restricted anyway, so avoid it self.logger.debug( "account %r is probably restricted, don't try its form", Field('id')(self)) return None # sometimes information are not in id but in href url = Attr('.//td/a', 'href', default=None)(self) parsed_url = urlparse(url) params = parse_qs(parsed_url.query) id_contrat = params['ID_CONTRAT'][0] producteur = params['PRODUCTEUR'][0] form = self.page.get_form('//form[@id="formRoutage"]') form['ID_CONTRAT'] = id_contrat form['PRODUCTEUR'] = producteur return form
def filter(self, url): qs = parse_qs(urlparse(url).query) if not qs.get(self.querykey): return self.default_or_raise(ItemNotFound('Key %s not found' % self.querykey)) if len(qs[self.querykey]) > 1: raise FilterError('More than one value for key %s' % self.querykey) return qs[self.querykey][0]
def obj__form(self): form_id = Attr('.//td/a', 'id', default=None)(self) form_class = Attr('.//td/a', 'class', default=None)(self) if form_id: if '-' in form_id: id_contrat = re.search(r'^(.*?)-', form_id).group(1) producteur = re.search(r'-(.*?)$', form_id).group(1) else: id_contrat = form_id producteur = None else: if len(self.xpath('.//td/a[has-class("clickPopupDetail")]')): # making a form of this link sometimes makes the site return an empty response... # the link is a link to some info, not full AV website # it's probably an indication the account is restricted anyway, so avoid it self.logger.debug("account %r is probably restricted, don't try its form", Field('id')(self)) return None # sometimes information are not in id but in href url = Attr('.//td/a', 'href', default=None)(self) parsed_url = urlparse(url) params = parse_qs(parsed_url.query) id_contrat = params['ID_CONTRAT'][0] producteur = params['PRODUCTEUR'][0] if 'redirect' in form_class: form = self.page.get_form('//form[@id="formRedirectPart"]') else: form = self.page.get_form('//form[@id="formRoutage"]') form['PRODUCTEUR'] = producteur form['ID_CONTRAT'] = id_contrat return form
def save_response(self, response, warning=False, **kwargs): if self.responses_dirname is None: import tempfile self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_') print('Debug data will be saved in this directory: %s' % self.responses_dirname, file=sys.stderr) elif not os.path.isdir(self.responses_dirname): os.makedirs(self.responses_dirname) import mimetypes # get the content-type, remove optionnal charset part mimetype = response.headers.get('Content-Type', '').split(';')[0] # due to http://bugs.python.org/issue1043134 if mimetype == 'text/plain': ext = '.txt' else: # try to get an extension (and avoid adding 'None') ext = mimetypes.guess_extension(mimetype, False) or '' path = re.sub(r'[^A-z0-9\.-_]+', '_', urlparse(response.url).path.rpartition('/')[2])[-10:] if path.endswith(ext): ext = '' filename = '%02d-%d%s%s%s' % \ (self.responses_count, response.status_code, '-' if path else '', path, ext) response_filepath = os.path.join(self.responses_dirname, filename) request = response.request with open(response_filepath + '-request.txt', 'w') as f: f.write('%s %s\n\n\n' % (request.method, request.url)) for key, value in request.headers.items(): f.write('%s: %s\n' % (key, value)) if request.body is not None: # separate '' from None f.write('\n\n\n%s' % request.body) with open(response_filepath + '-response.txt', 'w') as f: if hasattr(response.elapsed, 'total_seconds'): f.write('Time: %3.3fs\n' % response.elapsed.total_seconds()) f.write('%s %s\n\n\n' % (response.status_code, response.reason)) for key, value in response.headers.items(): f.write('%s: %s\n' % (key, value)) with open(response_filepath, 'wb') as f: f.write(response.content) match_filepath = os.path.join(self.responses_dirname, 'url_response_match.txt') with open(match_filepath, 'a') as f: f.write('# %d %s %s\n' % (response.status_code, response.reason, response.headers.get('Content-Type', ''))) f.write('%s\t%s\n' % (response.url, filename)) self.responses_count += 1 msg = u'Response saved to %s' % response_filepath if warning: self.logger.warning(msg) else: self.logger.info(msg)
def do_login(self): self.login.go() if self.home.is_here(): return self.page.login(self.username, self.password, self.lastname) if not self.home.is_here(): raise BrowserIncorrectPassword() # after login we need to get some tokens to use bouygues api data = { 'response_type': 'id_token token', 'client_id': 'a360.bouyguestelecom.fr', 'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/' } self.location('https://oauth2.bouyguestelecom.fr/authorize', params=data) parsed_url = urlparse(self.response.url) fragment = parse_qs(parsed_url.fragment) if not fragment: query = parse_qs(parsed_url.query) if 'server_error' in query.get('error', []): raise BrowserUnavailable(query['error_description'][0]) claims = jwt.get_unverified_claims(fragment['id_token'][0]) self.headers = {'Authorization': 'Bearer %s' % fragment['access_token'][0]} self.id_user = claims['id_personne']
def update_linebourse_token(self): assert self.linebourse is not None, "linebourse browser should already exist" self.linebourse.session.cookies.update(self.session.cookies) # It is important to fetch the domain dynamically because # for caissedepargne the domain is 'www.caisse-epargne.offrebourse.com' # whereas for creditcooperatif it is 'www.offrebourse.com' domain = urlparse(self.url).netloc self.linebourse.session.headers['X-XSRF-TOKEN'] = self.session.cookies.get('XSRF-TOKEN', domain=domain)
def save_response(self, response, warning=False, **kwargs): if self.responses_dirname is None: import tempfile self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_') print('Debug data will be saved in this directory: %s' % self.responses_dirname, file=sys.stderr) elif not os.path.isdir(self.responses_dirname): os.makedirs(self.responses_dirname) import mimetypes # get the content-type, remove optionnal charset part mimetype = response.headers.get('Content-Type', '').split(';')[0] # due to http://bugs.python.org/issue1043134 if mimetype == 'text/plain': ext = '.txt' else: # try to get an extension (and avoid adding 'None') ext = mimetypes.guess_extension(mimetype, False) or '' with self.responses_count_lock: counter = self.responses_count self.responses_count += 1 path = re.sub(r'[^A-z0-9\.-_]+', '_', urlparse(response.url).path.rpartition('/')[2])[-10:] if path.endswith(ext): ext = '' filename = '%02d-%d%s%s%s' % \ (counter, response.status_code, '-' if path else '', path, ext) response_filepath = os.path.join(self.responses_dirname, filename) request = response.request with open(response_filepath + '-request.txt', 'w') as f: f.write('%s %s\n\n\n' % (request.method, request.url)) for key, value in request.headers.items(): f.write('%s: %s\n' % (key, value)) if request.body is not None: # separate '' from None f.write('\n\n\n%s' % request.body) with open(response_filepath + '-response.txt', 'w') as f: if hasattr(response.elapsed, 'total_seconds'): f.write('Time: %3.3fs\n' % response.elapsed.total_seconds()) f.write('%s %s\n\n\n' % (response.status_code, response.reason)) for key, value in response.headers.items(): f.write('%s: %s\n' % (key, value)) with open(response_filepath, 'wb') as f: f.write(response.content) match_filepath = os.path.join(self.responses_dirname, 'url_response_match.txt') with open(match_filepath, 'a') as f: f.write('# %d %s %s\n' % (response.status_code, response.reason, response.headers.get('Content-Type', ''))) f.write('%s\t%s\n' % (response.url, filename)) msg = u'Response saved to %s' % response_filepath if warning: self.logger.warning(msg) else: self.logger.info(msg)
def go_post(self, url, data=None): # most of HSBC accounts links are actually handled by js code # which convert a GET query string to POST data. # not doing so often results in logout by the site q = dict(parse_qsl(urlparse(url).query)) if data: q.update(data) url = url[:url.find('?')] self.location(url, data=q)
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool(self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False if is_deferred_card: coming_debit_date = None # get coming debit date for deferred_card date_string = Regexp(CleanText(u'//option[contains(text(), "détail des factures à débiter le")]'), r'(\d{2}/\d{2}/\d{4})', default=NotAvailable)(self.doc) if date_string: coming_debit_date = parse_d(date_string) while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: if coming_debit_date: tr.date = coming_debit_date tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def getCurrentSubBank(self): # the account list and history urls depend on the sub bank of the user paths = urlparse(self.url).path.lstrip('/').split('/') self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else "" if self.currentSubBank and paths[0] == 'banqueprivee' and paths[1] == 'mabanque': self.currentSubBank = 'banqueprivee/mabanque/' if self.currentSubBank and paths[1] == "decouverte": self.currentSubBank += paths[1] + "/" if paths[0] in ["cmmabn", "fr", "mabanque", "banqueprivee"]: self.is_new_website = True
def update_linebourse_token(self): assert self.linebourse is not None, "linebourse browser should already exist" self.linebourse.session.cookies.update(self.session.cookies) # It is important to fetch the domain dynamically because # for caissedepargne the domain is 'www.caisse-epargne.offrebourse.com' # whereas for creditcooperatif it is 'www.offrebourse.com' domain = urlparse(self.url).netloc self.linebourse.session.headers[ 'X-XSRF-TOKEN'] = self.session.cookies.get('XSRF-TOKEN', domain=domain)
def getCurrentSubBank(self): # the account list and history urls depend on the sub bank of the user paths = urlparse(self.url).path.lstrip('/').split('/') self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else "" if self.currentSubBank and paths[0] == 'banqueprivee' and paths[1] == 'mabanque': self.currentSubBank = 'banqueprivee/mabanque/' if self.currentSubBank and paths[1] == "decouverte": self.currentSubBank += paths[1] + "/" if paths[0] in ["fr", "mabanque", "banqueprivee"]: self.is_new_website = True
def do_login(self): if not self.username or not self.password: raise BrowserIncorrectPassword() # Re-set the BASEURL to the origin URL in case of logout self.BASEURL = self.ORIGIN_URL # From the home page, fetch the login url to go to login page login_url = self.home.go().get_login_url() assert login_url, "L'adresse URL %s n'est pas gérée actuellement." % self.ORIGIN_URL parsed_url = urlparse(login_url) self.BASEURL = '%s://%s' % (parsed_url.scheme, parsed_url.netloc) # Go to login page and POST the username login_data = { 'CCPTE': self.username, 'urlOrigine': self.ORIGIN_URL, 'typeAuthentification': 'CLIC_ALLER', 'situationTravail': 'BANCAIRE', 'origine': 'vitrine', 'matrice': 'true', 'canal': 'WEB', } self.login.go(data=login_data) assert self.login.is_here() # POST the password and fetch the URL after login self.page.submit_password(self.username, self.password) url_after_login = self.page.get_accounts_url() # For some connections, the first session_value is contained in the URL # after login, so we must set it before going to the accounts page. m = re.search(r'sessionSAG=([^&]+)', url_after_login) if m: self.session_value = m.group(1) # In case of wrongpass, instead of a URL, the node will contain a message such as # 'Votre identification est incorrecte, veuillez ressaisir votre numéro de compte et votre code d'accès' if not url_after_login.startswith('https'): raise BrowserIncorrectPassword(url_after_login) # The session value is necessary for correct navigation. self.location(url_after_login) self.accounts.go() assert self.accounts.is_here() # No need to get perimeters in case of re-login if not self.perimeters: self.get_all_perimeters()
def request_access_token(self, auth_uri): self.logger.info('requesting access token') if isinstance(auth_uri, dict): values = auth_uri else: values = dict(parse_qsl(urlparse(auth_uri).query)) data = self.build_access_token_parameters(values) try: auth_response = self.do_token_request(data).json() except ClientError: raise BrowserIncorrectPassword() self.update_token(auth_response)
def do_login(self): params = {'response_type': 'code', 'client_id': '534890559860-r6gn7e3agcpiriehe63dkeus0tpl5i4i.apps.googleusercontent.com', 'redirect_uri': self.redirect_uri} queryString = "&".join([key+'='+value for key, value in params.items()]) self.google_login.go(auth='o/oauth2/auth', params=queryString).login(self.username, self.password) if self.google_login.is_here(): self.page.login(self.username, self.password) try: self.code = parse_qs(urlparse(self.url).query).get('code')[0] except: raise BrowserIncorrectPassword()
def get_referrer(self, oldurl, newurl): """ Get the referrer to send when doing a request. If we should not send a referrer, it will return None. Reference: https://en.wikipedia.org/wiki/HTTP_referer The behavior can be controlled through the ALLOW_REFERRER attribute. True always allows the referers to be sent, False never, and None only if it is within the same domain. :param oldurl: Current absolute URL :type oldurl: str or None :param newurl: Target absolute URL :type newurl: str :rtype: str or None """ if self.ALLOW_REFERRER is False: return if oldurl is None: return old = urlparse(oldurl) new = urlparse(newurl) # Do not leak secure URLs to insecure URLs if old.scheme == 'https' and new.scheme != 'https': return # Reloading the page. Usually no referrer. if oldurl == newurl: return # Domain-based privacy if self.ALLOW_REFERRER is None and old.netloc != new.netloc: return return oldurl
def prepare_url(url, fields): components = urlparse(url) query_pairs = [(f, v) for (f, v) in parse_qsl(components.query) if f not in fields] for (field, value) in fields.items(): query_pairs.append((field, value)) new_query_str = urlencode(query_pairs) new_components = (components.scheme, components.netloc, components.path, components.params, new_query_str, components.fragment) return urlunparse(new_components)
def request_access_token(self, auth_uri): self.logger.info('requesting access token') if isinstance(auth_uri, dict): values = auth_uri else: values = dict(parse_qsl(urlparse(auth_uri).query)) self.handle_callback_error(values) data = self.build_access_token_parameters(values) try: auth_response = self.do_token_request(data).json() except ClientError: raise BrowserIncorrectPassword() self.update_token(auth_response)
def quit_market_website(self): parsed = urlparse(self.url) exit_url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect' % (parsed.scheme, parsed.netloc) page = self.open(exit_url).page try: form = page.get_form(name='formulaire') except FormNotFound: msg = CleanText(u'//b[contains(text() , "Nous vous invitons à créer un mot de passe trading.")]')(self.page.doc) if msg: raise ActionNeeded(msg) else: # 'act' parameter allows page recognition, this parameter is ignored by # server self.location(form.url + '&act=Synthepargnes') self.update_sag()
def download(self, video, dest, default=None): if not video.url: print('Error: the direct URL is not available.', file=self.stderr) return 4 def check_exec(executable): with open(os.devnull, 'w') as devnull: process = subprocess.Popen(['which', executable], stdout=devnull) if process.wait() != 0: print('Please install "%s"' % executable, file=self.stderr) return False return True dest = self.obj_to_filename(video, dest, default) if video.url.startswith('rtmp'): if not check_exec('rtmpdump'): return 1 args = ('rtmpdump', '-e', '-r', video.url, '-o', dest) elif video.url.startswith('mms'): if not check_exec('mimms'): return 1 args = ('mimms', '-r', video.url, dest) elif u'm3u8' == video.ext: _dest, _ = os.path.splitext(dest) dest = u'%s.%s' % (_dest, 'mp4') content = tuple() parsed_uri = urlparse(video.url) baseurl = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri) for line in self.read_url(video.url): line = line.decode('utf-8') if not line.startswith('#'): if not line.startswith('http'): line = u'%s%s' % (baseurl, line) content += (line,) args = ('wget', '-nv',) + content + ('-O', dest) else: if check_exec('wget'): args = ('wget', '-c', video.url, '-O', dest) elif check_exec('curl'): args = ('curl', '-C', '-', video.url, '-o', dest) else: return 1 self.logger.debug(' '.join(args)) os.spawnlp(os.P_WAIT, args[0], *args)
def moveto_market_website(self, account, home=False): response = self.open(account.url % self.sag).text self._sag = None # https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.navigation.AccueilBridge?TOKEN_ID= m = re.search('document.location="([^"]+)"', response) if m: url = m.group(1) else: self.logger.warning('Unable to go to market website') raise WebsiteNotSupported() self.open(url) if home: return 'https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis' parsed = urlparse(url) url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.account.WalletVal?nump=%s:%s' return url % (parsed.scheme, parsed.netloc, account.id, self.code_caisse)
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool( self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def moveto_market_website(self, account, home=False): response = self.open(account.url % self.sag).text self._sag = None # https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.navigation.AccueilBridge?TOKEN_ID= m = re.search('document.location="([^"]+)"', response) if m: url = m.group(1) else: self.logger.warn('Unable to go to market website') raise WebsiteNotSupported() self.open(url) if home: return 'https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis' parsed = urlparse(url) url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.account.WalletVal?nump=%s:%s' return url % (parsed.scheme, parsed.netloc, account.id, self.code_caisse)
def do_login(self): self.login_page.go() self.page.login(self.username, self.password, self.lastname) # q is timestamp millisecond self.app_config.go(params={'q': int(time()*1000)}) client_id = self.page.get_client_id() params = { 'client_id': client_id, 'response_type': 'id_token token', 'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/' } self.location('https://oauth2.bouyguestelecom.fr/authorize', params=params) fragments = dict(parse_qsl(urlparse(self.url).fragment)) self.id_personne = jwt.get_unverified_claims(fragments['id_token'])['id_personne'] authorization = 'Bearer ' + fragments['access_token'] self.headers = {'Authorization': authorization}
def quit_market_website(self): parsed = urlparse(self.url) exit_url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect' % ( parsed.scheme, parsed.netloc) page = self.open(exit_url).page try: form = page.get_form(name='formulaire') except FormNotFound: msg = CleanText( u'//b[contains(text() , "Nous vous invitons à créer un mot de passe trading.")]' )(self.page.doc) if msg: raise ActionNeeded(msg) else: # 'act' parameter allows page recognition, this parameter is ignored by # server self.location(form.url + '&act=Synthepargnes') self.update_sag()
def prepare_url(url, fields): components = urlparse(url) query_pairs = [(f, v) for (f, v) in parse_qsl(components.query) if f not in fields] for (field, value) in fields.items(): query_pairs.append((field, value)) new_query_str = urlencode(query_pairs) new_components = ( components.scheme, components.netloc, components.path, components.params, new_query_str, components.fragment ) return urlunparse(new_components)
def do_login(self): self.login_page.go() try: self.page.login(self.username, self.password, self.lastname) except ClientError as e: if e.response.status_code == 401: raise BrowserIncorrectPassword() raise if self.login_page.is_here(): msg = self.page.get_error_message() raise BrowserIncorrectPassword(msg) if self.forgotten_password_page.is_here(): # when too much attempt has been done in a short time, bouygues redirect us here, # but no message is available on this page raise BrowserIncorrectPassword() # q is timestamp millisecond self.app_config.go(params={'q': int(time() * 1000)}) client_id = self.page.get_client_id() params = { 'client_id': client_id, 'response_type': 'id_token token', 'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/' } self.location('https://oauth2.bouyguestelecom.fr/authorize', params=params) fragments = dict(parse_qsl(urlparse(self.url).fragment)) self.id_personne = jwt.get_unverified_claims( fragments['id_token'])['id_personne'] authorization = 'Bearer ' + fragments['access_token'] self.headers = {'Authorization': authorization}
def build_authorization_uri(self): p = urlparse(self.AUTHORIZATION_URI) q = dict(parse_qsl(p.query)) q.update(self.build_authorization_parameters()) return p._replace(query=urlencode(q)).geturl()
def do_login(self): """ Attempt to log in. Note: this method does nothing if we are already logged in. """ self.BASEURL = 'https://%s/' % self.first_domain self._sag = None if not self.home_page.is_here(): self.home_page.go() if self.new_login: self.page.go_to_auth() parsed = urlparse(self.url) self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc) else: # On the homepage, we get the URL of the auth service. url = self.page.get_post_url() if url is None: raise WebsiteNotSupported() # First, post account number to get the password prompt. data = { 'CCPTE': self.username[:11].encode('iso8859-15'), 'canal': 'WEB', 'hauteur_ecran': 768, 'largeur_ecran': 1024, 'liberror': '', 'matrice': 'true', 'origine': 'vitrine', 'situationTravail': 'BANCAIRE', 'typeAuthentification': 'CLIC_ALLER', 'urlOrigine': self.page.url, 'vitrine': 0, } parsed = urlparse(url) self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc) self.location(url, data=data) assert self.login_page.is_here() # Then, post the password. self.page.login(self.username, self.password) if self.new_login: url = self.page.get_accounts_url() else: # The result of POST is the destination URL. url = self.page.get_result_url() if not url.startswith('http'): raise BrowserIncorrectPassword(unescape(url, unicode_snob=True)) self.location(url.replace('Synthese', 'Synthcomptes')) if self.login_error.is_here(): raise BrowserIncorrectPassword() if self.page is None: raise WebsiteNotSupported() if not self.accounts.is_here(): # Sometimes the home page is Releves. new_url = re.sub('act=([^&=]+)', 'act=Synthcomptes', self.page.url, 1) self.location(new_url) if not self.accounts.is_here(): raise BrowserIncorrectPassword() if self.code_caisse is None: self.code_caisse = self.page.get_code_caisse() # Store the current url to go back when requesting accounts list. self.accounts_url = re.sub('sessionSAG=[^&]+', 'sessionSAG={0}', self.page.url) # we can deduce the URL to "savings" and "loan" accounts from the regular accounts one self.savings_url = re.sub('act=([^&=]+)', 'act=Synthepargnes', self.accounts_url, 1) self.loans_url = re.sub('act=([^&=]+)', 'act=Synthcredits', self.accounts_url, 1) self.advisor_url = re.sub('act=([^&=]+)', 'act=Contact', self.accounts_url, 1) self.profile_url = re.sub('act=([^&=]+)', 'act=Coordonnees', self.accounts_url, 1) if self.page.check_perimeters() and not self.broken_perimeters: self.perimeter_url = re.sub('act=([^&=]+)', 'act=Perimetre', self.accounts_url, 1) self.chg_perimeter_url = '%s%s' % (re.sub( 'act=([^&=]+)', 'act=ChgPerim', self.accounts_url, 1), '&typeaction=ChgPerim') self.location(self.perimeter_url.format(self.sag)) self.page.check_multiple_perimeters()
def do_login(self): """ Attempt to log in. Note: this method does nothing if we are already logged in. """ self.BASEURL = 'https://%s/' % self.first_domain self._sag = None if not self.home_page.is_here(): self.home_page.go() if self.new_website.is_here(): self.logger.warning('This connection uses the new API website') raise SiteSwitch('api') if self.new_login: self.page.go_to_auth() parsed = urlparse(self.url) self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc) else: # On the homepage, we get the URL of the auth service. url = self.page.get_post_url() if url is None: raise WebsiteNotSupported() # First, post account number to get the password prompt. data = {'CCPTE': self.username[:11].encode('iso8859-15'), 'canal': 'WEB', 'hauteur_ecran': 768, 'largeur_ecran': 1024, 'liberror': '', 'matrice': 'true', 'origine': 'vitrine', 'situationTravail': 'BANCAIRE', 'typeAuthentification': 'CLIC_ALLER', 'urlOrigine': self.page.url, 'vitrine': 0, } parsed = urlparse(url) self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc) self.location(url, data=data) assert self.login_page.is_here() # Then, post the password. self.page.login(self.username, self.password) if self.new_login: url = self.page.get_accounts_url() else: # The result of POST is the destination URL. url = self.page.get_result_url() if not url.startswith('http'): raise BrowserIncorrectPassword(unescape(url, unicode_snob=True)) self.location(url.replace('Synthese', 'Synthcomptes')) if self.login_error.is_here(): raise BrowserIncorrectPassword() if self.page is None: raise WebsiteNotSupported() if not self.accounts.is_here(): # Sometimes the home page is Releves. new_url = re.sub('act=([^&=]+)', 'act=Synthcomptes', self.page.url, 1) self.location(new_url) if not self.accounts.is_here(): raise BrowserIncorrectPassword() if self.code_caisse is None: self.code_caisse = self.page.get_code_caisse() # Store the current url to go back when requesting accounts list. self.accounts_url = re.sub('sessionSAG=[^&]+', 'sessionSAG={0}', self.page.url) # we can deduce the URL to "savings" and "loan" accounts from the regular accounts one self.savings_url = re.sub('act=([^&=]+)', 'act=Synthepargnes', self.accounts_url, 1) self.loans_url = re.sub('act=([^&=]+)', 'act=Synthcredits', self.accounts_url, 1) self.advisor_url = re.sub('act=([^&=]+)', 'act=Contact', self.accounts_url, 1) self.profile_url = re.sub('act=([^&=]+)', 'act=Coordonnees', self.accounts_url, 1) if self.page.check_perimeters() and not self.broken_perimeters: self.perimeter_url = re.sub('act=([^&=]+)', 'act=Perimetre', self.accounts_url, 1) self.chg_perimeter_url = '%s%s' % (re.sub('act=([^&=]+)', 'act=ChgPerim', self.accounts_url, 1), '&typeaction=ChgPerim') self.location(self.perimeter_url.format(self.sag)) self.page.check_multiple_perimeters()
def search_galleries(self, pattern, sortby=CapGallery.SEARCH_RELEVANCE): pattern = pattern.lower() url = self.url() if pattern in url or pattern in self.browser.get_title().lower(): yield self.get_gallery(urlparse(url).netloc)
def iter_torrents(self): table = self.document.getroot().cssselect('table.torrent_table') if not table: table = self.document.getroot().cssselect('table#browse_torrent_table') if table: table = table[0] current_group = None for tr in table.findall('tr'): if tr.attrib.get('class', '') == 'colhead': # ignore continue if tr.attrib.get('class', '') == 'group': tds = tr.findall('td') current_group = u'' div = tds[-6] if div.getchildren()[0].tag == 'div': div = div.getchildren()[0] for a in div.findall('a'): if not a.text: continue if current_group: current_group += ' - ' current_group += a.text elif tr.attrib.get('class', '').startswith('group_torrent') or \ tr.attrib.get('class', '').startswith('torrent'): tds = tr.findall('td') title = current_group if len(tds) == 7: # Under a group i = 0 elif len(tds) in (8, 9): # An alone torrent i = len(tds) - 1 while i >= 0 and tds[i].find('a') is None: i -= 1 else: # Useless title continue if title: title += u' (%s)' % tds[i].find('a').text else: title = ' - '.join([a.text for a in tds[i].findall('a')]) url = urlparse(tds[i].find('a').attrib['href']) params = parse_qs(url.query) if 'torrentid' in params: id = '%s.%s' % (params['id'][0], params['torrentid'][0]) else: url = tds[i].find('span').find('a').attrib['href'] m = self.TORRENTID_REGEXP.match(url) if not m: continue id = '%s.%s' % (params['id'][0], m.group(1)) try: size, unit = tds[i + 3].text.split() except ValueError: size, unit = tds[i + 2].text.split() size = get_bytes_size(float(size.replace(',', '')), unit) seeders = int(tds[-2].text) leechers = int(tds[-1].text) torrent = Torrent(id, title) torrent.url = self.format_url(url) torrent.size = size torrent.seeders = seeders torrent.leechers = leechers yield torrent else: debug('unknown attrib: %s' % tr.attrib)
def fc_redirect(self, url): self.BASEURL = 'https://app.franceconnect.gouv.fr' self.location(url) self.page.redirect() parse_result = urlparse(self.url) self.BASEURL = parse_result.scheme + '://' + parse_result.netloc
def getCurrentSubBank(self): # the account list and history urls depend on the sub bank of the user paths = urlparse(self.url).path.lstrip('/').split('/') self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else "" if paths[0] in ["fr", "mabanque"]: self.is_new_website = True
def get_split_path(self): ret = urlparse(self.url).path.split('/')[1:] if not ret[0]: ret = ret[1:] return ret
def set_base_url(self, place): if not place: place = self.default_place self.base.go(data={'query': place}) parsed_uri = urlparse(self.page.url) self.BASEURL = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
def get_context_token(self): parameters = dict(parse_qsl(urlparse(self.url).query)) return parameters.get('context_token', None)
def add_qs(url, **kwargs): parts = list(urlparse(url)) qs = OrderedDict(parse_qsl(parts[4])) qs.update(kwargs) parts[4] = urlencode(qs) return urlunparse(parts)
def iter_torrents(self): table = self.document.getroot().cssselect('table.torrent_table') if not table: table = self.document.getroot().cssselect( 'table#browse_torrent_table') if table: table = table[0] current_group = None for tr in table.findall('tr'): if tr.attrib.get('class', '') == 'colhead': # ignore continue if tr.attrib.get('class', '') == 'group': tds = tr.findall('td') current_group = u'' div = tds[-6] if div.getchildren()[0].tag == 'div': div = div.getchildren()[0] for a in div.findall('a'): if not a.text: continue if current_group: current_group += ' - ' current_group += a.text elif tr.attrib.get('class', '').startswith('group_torrent') or \ tr.attrib.get('class', '').startswith('torrent'): tds = tr.findall('td') title = current_group if len(tds) == 7: # Under a group i = 0 elif len(tds) in (8, 9): # An alone torrent i = len(tds) - 1 while i >= 0 and tds[i].find('a') is None: i -= 1 else: # Useless title continue if title: title += u' (%s)' % tds[i].find('a').text else: title = ' - '.join( [a.text for a in tds[i].findall('a')]) url = urlparse(tds[i].find('a').attrib['href']) params = parse_qs(url.query) if 'torrentid' in params: id = '%s.%s' % (params['id'][0], params['torrentid'][0]) else: url = tds[i].find('span').find('a').attrib['href'] m = self.TORRENTID_REGEXP.match(url) if not m: continue id = '%s.%s' % (params['id'][0], m.group(1)) try: size, unit = tds[i + 3].text.split() except ValueError: size, unit = tds[i + 2].text.split() size = get_bytes_size(float(size.replace(',', '')), unit) seeders = int(tds[-2].text) leechers = int(tds[-1].text) torrent = Torrent(id, title) torrent.url = self.format_url(url) torrent.size = size torrent.seeders = seeders torrent.leechers = leechers yield torrent else: debug('unknown attrib: %s' % tr.attrib)
def get_params(self, url): parsed = urlparse(url) base_url, params = parsed.path, parse_qs(parsed.query) for a in self.doc.xpath('//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]'): params['sessionid'] = parse_qs(urlparse(Link('.')(a)).query)['sessionid'] yield base_url, params
def get_code(self): return parse_qs(urlparse(self.url).query)['code'][0]
def get_gallery(self, _id): url = self.config['url'].get() return BaseGallery(_id, title=urlparse(url).netloc, url=url)
def search_galleries(self, pattern, sortby=CapGallery.SEARCH_RELEVANCE): pattern = pattern.lower() url = self.url() if pattern in url or pattern in self.browser.get_title_icon()[0].lower( ): yield self.get_gallery(urlparse(url).netloc)