Python urlparse Examples, weboob.tools.compat.urlparse Python Examples

Example #1

0

Show file

File: account_pages.py Project: antibios/weboob

 def get_params(self, url):
     parsed = urlparse(url)
     base_url, params = parsed.path, parse_qs(parsed.query)
     for a in self.doc.xpath(
             '//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]'
     ):
         params['sessionid'] = parse_qs(urlparse(
             Link('.')(a)).query)['sessionid']
         yield base_url, params

Example #2

0

Show file

            def obj__form(self):
                form_id = Attr('.//td/a', 'id', default=None)(self)
                if form_id:
                    id_contrat = re.search(r'^(.*?)-', form_id).group(1)
                    producteur = re.search(r'-(.*?)$', form_id).group(1)
                else:
                    if len(self.xpath(
                            './/td/a[has-class("clickPopupDetail")]')):
                        # making a form of this link sometimes makes the site return an empty response...
                        # the link is a link to some info, not full AV website
                        # it's probably an indication the account is restricted anyway, so avoid it
                        self.logger.debug(
                            "account %r is probably restricted, don't try its form",
                            Field('id')(self))
                        return None

                    # sometimes information are not in id but in href
                    url = Attr('.//td/a', 'href', default=None)(self)
                    parsed_url = urlparse(url)
                    params = parse_qs(parsed_url.query)

                    id_contrat = params['ID_CONTRAT'][0]
                    producteur = params['PRODUCTEUR'][0]

                form = self.page.get_form('//form[@id="formRoutage"]')
                form['ID_CONTRAT'] = id_contrat
                form['PRODUCTEUR'] = producteur
                return form

Example #3

0

Show file

 def filter(self, url):
     qs = parse_qs(urlparse(url).query)
     if not qs.get(self.querykey):
         return self.default_or_raise(ItemNotFound('Key %s not found' % self.querykey))
     if len(qs[self.querykey]) > 1:
         raise FilterError('More than one value for key %s' % self.querykey)
     return qs[self.querykey][0]

Example #4

0

Show file

File: pages.py Project: P4ncake/weboob

            def obj__form(self):
                form_id = Attr('.//td/a', 'id', default=None)(self)
                form_class = Attr('.//td/a', 'class', default=None)(self)
                if form_id:
                    if '-' in form_id:
                        id_contrat = re.search(r'^(.*?)-', form_id).group(1)
                        producteur = re.search(r'-(.*?)$', form_id).group(1)
                    else:
                        id_contrat = form_id
                        producteur = None
                else:
                    if len(self.xpath('.//td/a[has-class("clickPopupDetail")]')):
                        # making a form of this link sometimes makes the site return an empty response...
                        # the link is a link to some info, not full AV website
                        # it's probably an indication the account is restricted anyway, so avoid it
                        self.logger.debug("account %r is probably restricted, don't try its form", Field('id')(self))
                        return None

                    # sometimes information are not in id but in href
                    url = Attr('.//td/a', 'href', default=None)(self)
                    parsed_url = urlparse(url)
                    params = parse_qs(parsed_url.query)

                    id_contrat = params['ID_CONTRAT'][0]
                    producteur = params['PRODUCTEUR'][0]

                if 'redirect' in form_class:
                    form = self.page.get_form('//form[@id="formRedirectPart"]')
                else:
                    form = self.page.get_form('//form[@id="formRoutage"]')
                    form['PRODUCTEUR'] = producteur
                form['ID_CONTRAT'] = id_contrat
                return form

Example #5

0

Show file

File: browsers.py Project: MrFireFrog/weboob

    def save_response(self, response, warning=False, **kwargs):
        if self.responses_dirname is None:
            import tempfile
            self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_')
            print('Debug data will be saved in this directory: %s' %
                  self.responses_dirname,
                  file=sys.stderr)
        elif not os.path.isdir(self.responses_dirname):
            os.makedirs(self.responses_dirname)

        import mimetypes
        # get the content-type, remove optionnal charset part
        mimetype = response.headers.get('Content-Type', '').split(';')[0]
        # due to http://bugs.python.org/issue1043134
        if mimetype == 'text/plain':
            ext = '.txt'
        else:
            # try to get an extension (and avoid adding 'None')
            ext = mimetypes.guess_extension(mimetype, False) or ''

        path = re.sub(r'[^A-z0-9\.-_]+', '_',
                      urlparse(response.url).path.rpartition('/')[2])[-10:]
        if path.endswith(ext):
            ext = ''
        filename = '%02d-%d%s%s%s' % \
            (self.responses_count, response.status_code, '-' if path else '', path, ext)

        response_filepath = os.path.join(self.responses_dirname, filename)

        request = response.request
        with open(response_filepath + '-request.txt', 'w') as f:
            f.write('%s %s\n\n\n' % (request.method, request.url))
            for key, value in request.headers.items():
                f.write('%s: %s\n' % (key, value))
            if request.body is not None:  # separate '' from None
                f.write('\n\n\n%s' % request.body)
        with open(response_filepath + '-response.txt', 'w') as f:
            if hasattr(response.elapsed, 'total_seconds'):
                f.write('Time: %3.3fs\n' % response.elapsed.total_seconds())
            f.write('%s %s\n\n\n' % (response.status_code, response.reason))
            for key, value in response.headers.items():
                f.write('%s: %s\n' % (key, value))

        with open(response_filepath, 'wb') as f:
            f.write(response.content)

        match_filepath = os.path.join(self.responses_dirname,
                                      'url_response_match.txt')
        with open(match_filepath, 'a') as f:
            f.write('# %d %s %s\n' %
                    (response.status_code, response.reason,
                     response.headers.get('Content-Type', '')))
            f.write('%s\t%s\n' % (response.url, filename))
        self.responses_count += 1

        msg = u'Response saved to %s' % response_filepath
        if warning:
            self.logger.warning(msg)
        else:
            self.logger.info(msg)

Example #6

0

Show file

File: browser.py Project: P4ncake/weboob

    def do_login(self):
        self.login.go()

        if self.home.is_here():
            return

        self.page.login(self.username, self.password, self.lastname)

        if not self.home.is_here():
            raise BrowserIncorrectPassword()

        # after login we need to get some tokens to use bouygues api
        data = {
            'response_type': 'id_token token',
            'client_id': 'a360.bouyguestelecom.fr',
            'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/'
        }
        self.location('https://oauth2.bouyguestelecom.fr/authorize', params=data)

        parsed_url = urlparse(self.response.url)
        fragment = parse_qs(parsed_url.fragment)

        if not fragment:
            query = parse_qs(parsed_url.query)
            if 'server_error' in query.get('error', []):
                raise BrowserUnavailable(query['error_description'][0])

        claims = jwt.get_unverified_claims(fragment['id_token'][0])
        self.headers = {'Authorization': 'Bearer %s' % fragment['access_token'][0]}
        self.id_user = claims['id_personne']

Example #7

0

Show file

File: browser.py Project: laurentb/weboob

 def update_linebourse_token(self):
     assert self.linebourse is not None, "linebourse browser should already exist"
     self.linebourse.session.cookies.update(self.session.cookies)
     # It is important to fetch the domain dynamically because
     # for caissedepargne the domain is 'www.caisse-epargne.offrebourse.com'
     # whereas for creditcooperatif it is 'www.offrebourse.com'
     domain = urlparse(self.url).netloc
     self.linebourse.session.headers['X-XSRF-TOKEN'] = self.session.cookies.get('XSRF-TOKEN', domain=domain)

Example #8

0

Show file

File: browsers.py Project: P4ncake/weboob

    def save_response(self, response, warning=False, **kwargs):
        if self.responses_dirname is None:
            import tempfile
            self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_')
            print('Debug data will be saved in this directory: %s' % self.responses_dirname, file=sys.stderr)
        elif not os.path.isdir(self.responses_dirname):
            os.makedirs(self.responses_dirname)

        import mimetypes
        # get the content-type, remove optionnal charset part
        mimetype = response.headers.get('Content-Type', '').split(';')[0]
        # due to http://bugs.python.org/issue1043134
        if mimetype == 'text/plain':
            ext = '.txt'
        else:
            # try to get an extension (and avoid adding 'None')
            ext = mimetypes.guess_extension(mimetype, False) or ''

        with self.responses_count_lock:
            counter = self.responses_count
            self.responses_count += 1

        path = re.sub(r'[^A-z0-9\.-_]+', '_', urlparse(response.url).path.rpartition('/')[2])[-10:]
        if path.endswith(ext):
            ext = ''
        filename = '%02d-%d%s%s%s' % \
            (counter, response.status_code, '-' if path else '', path, ext)

        response_filepath = os.path.join(self.responses_dirname, filename)

        request = response.request
        with open(response_filepath + '-request.txt', 'w') as f:
            f.write('%s %s\n\n\n' % (request.method, request.url))
            for key, value in request.headers.items():
                f.write('%s: %s\n' % (key, value))
            if request.body is not None:  # separate '' from None
                f.write('\n\n\n%s' % request.body)
        with open(response_filepath + '-response.txt', 'w') as f:
            if hasattr(response.elapsed, 'total_seconds'):
                f.write('Time: %3.3fs\n' % response.elapsed.total_seconds())
            f.write('%s %s\n\n\n' % (response.status_code, response.reason))
            for key, value in response.headers.items():
                f.write('%s: %s\n' % (key, value))

        with open(response_filepath, 'wb') as f:
            f.write(response.content)

        match_filepath = os.path.join(self.responses_dirname, 'url_response_match.txt')
        with open(match_filepath, 'a') as f:
            f.write('# %d %s %s\n' % (response.status_code, response.reason, response.headers.get('Content-Type', '')))
            f.write('%s\t%s\n' % (response.url, filename))

        msg = u'Response saved to %s' % response_filepath
        if warning:
            self.logger.warning(msg)
        else:
            self.logger.info(msg)

Example #9

0

Show file

 def go_post(self, url, data=None):
     # most of HSBC accounts links are actually handled by js code
     # which convert a GET query string to POST data.
     # not doing so often results in logout by the site
     q = dict(parse_qsl(urlparse(url).query))
     if data:
         q.update(data)
     url = url[:url.find('?')]
     self.location(url, data=q)

Example #10

0

Show file

File: browser.py Project: P4ncake/weboob

 def go_post(self, url, data=None):
     # most of HSBC accounts links are actually handled by js code
     # which convert a GET query string to POST data.
     # not doing so often results in logout by the site
     q = dict(parse_qsl(urlparse(url).query))
     if data:
         q.update(data)
     url = url[:url.find('?')]
     self.location(url, data=q)

Example #11

0

Show file

File: accounts_list.py Project: P4ncake/weboob

    def iter_transactions(self):
        url = self.get_part_url()
        if url is None:
            # There are no transactions in this kind of account
            return

        is_deferred_card = bool(self.doc.xpath(u'//div[contains(text(), "Différé")]'))
        has_summary = False

        if is_deferred_card:
            coming_debit_date = None
            # get coming debit date for deferred_card
            date_string = Regexp(CleanText(u'//option[contains(text(), "détail des factures à débiter le")]'),
                                r'(\d{2}/\d{2}/\d{4})',
                                default=NotAvailable)(self.doc)
            if date_string:
                coming_debit_date = parse_d(date_string)

        while True:
            d = XML(self.browser.open(url).content)
            el = d.xpath('//dataBody')
            if not el:
                return

            el = el[0]
            s = unicode(el.text).encode('iso-8859-1')
            doc = fromstring(s)

            for tr in self._iter_transactions(doc):
                if tr.type == Transaction.TYPE_CARD_SUMMARY:
                    has_summary = True
                if is_deferred_card and tr.type is Transaction.TYPE_CARD:
                    tr.type = Transaction.TYPE_DEFERRED_CARD
                    if not has_summary:
                        if coming_debit_date:
                            tr.date = coming_debit_date
                        tr._coming = True
                yield tr

            el = d.xpath('//dataHeader')[0]
            if int(el.find('suite').text) != 1:
                return

            url = urlparse(url)
            p = parse_qs(url.query)

            args = {}
            args['n10_nrowcolor'] = 0
            args['operationNumberPG'] = el.find('operationNumber').text
            args['operationTypePG'] = el.find('operationType').text
            args['pageNumberPG'] = el.find('pageNumber').text
            args['idecrit'] = el.find('idecrit').text or ''
            args['sign'] = p['sign'][0]
            args['src'] = p['src'][0]

            url = '%s?%s' % (url.path, urlencode(args))

Example #12

0

Show file

File: browser.py Project: dkremer-ledger/weboob

 def getCurrentSubBank(self):
     # the account list and history urls depend on the sub bank of the user
     paths = urlparse(self.url).path.lstrip('/').split('/')
     self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else ""
     if self.currentSubBank and paths[0] == 'banqueprivee' and paths[1] == 'mabanque':
         self.currentSubBank = 'banqueprivee/mabanque/'
     if self.currentSubBank and paths[1] == "decouverte":
         self.currentSubBank += paths[1] + "/"
     if paths[0] in ["cmmabn", "fr", "mabanque", "banqueprivee"]:
         self.is_new_website = True

Example #13

0

Show file

File: browser.py Project: guix77/weboob

 def update_linebourse_token(self):
     assert self.linebourse is not None, "linebourse browser should already exist"
     self.linebourse.session.cookies.update(self.session.cookies)
     # It is important to fetch the domain dynamically because
     # for caissedepargne the domain is 'www.caisse-epargne.offrebourse.com'
     # whereas for creditcooperatif it is 'www.offrebourse.com'
     domain = urlparse(self.url).netloc
     self.linebourse.session.headers[
         'X-XSRF-TOKEN'] = self.session.cookies.get('XSRF-TOKEN',
                                                    domain=domain)

Example #14

0

Show file

File: browser.py Project: laurentb/weboob

 def getCurrentSubBank(self):
     # the account list and history urls depend on the sub bank of the user
     paths = urlparse(self.url).path.lstrip('/').split('/')
     self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else ""
     if self.currentSubBank and paths[0] == 'banqueprivee' and paths[1] == 'mabanque':
         self.currentSubBank = 'banqueprivee/mabanque/'
     if self.currentSubBank and paths[1] == "decouverte":
         self.currentSubBank += paths[1] + "/"
     if paths[0] in ["fr", "mabanque", "banqueprivee"]:
         self.is_new_website = True

Example #15

0

Show file

    def do_login(self):
        if not self.username or not self.password:
            raise BrowserIncorrectPassword()

        # Re-set the BASEURL to the origin URL in case of logout
        self.BASEURL = self.ORIGIN_URL

        # From the home page, fetch the login url to go to login page
        login_url = self.home.go().get_login_url()
        assert login_url, "L'adresse URL %s n'est pas gérée actuellement." % self.ORIGIN_URL

        parsed_url = urlparse(login_url)
        self.BASEURL = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)

        # Go to login page and POST the username
        login_data = {
            'CCPTE': self.username,
            'urlOrigine': self.ORIGIN_URL,
            'typeAuthentification': 'CLIC_ALLER',
            'situationTravail': 'BANCAIRE',
            'origine': 'vitrine',
            'matrice': 'true',
            'canal': 'WEB',
        }
        self.login.go(data=login_data)
        assert self.login.is_here()

        # POST the password and fetch the URL after login
        self.page.submit_password(self.username, self.password)
        url_after_login = self.page.get_accounts_url()

        # For some connections, the first session_value is contained in the URL
        # after login, so we must set it before going to the accounts page.
        m = re.search(r'sessionSAG=([^&]+)', url_after_login)
        if m:
            self.session_value = m.group(1)

        # In case of wrongpass, instead of a URL, the node will contain a message such as
        # 'Votre identification est incorrecte, veuillez ressaisir votre numéro de compte et votre code d'accès'
        if not url_after_login.startswith('https'):
            raise BrowserIncorrectPassword(url_after_login)

        # The session value is necessary for correct navigation.
        self.location(url_after_login)

        self.accounts.go()
        assert self.accounts.is_here()

        # No need to get perimeters in case of re-login
        if not self.perimeters:
            self.get_all_perimeters()

Example #16

0

Show file

File: browsers.py Project: laurentb/weboob

    def request_access_token(self, auth_uri):
        self.logger.info('requesting access token')

        if isinstance(auth_uri, dict):
            values = auth_uri
        else:
            values = dict(parse_qsl(urlparse(auth_uri).query))
        data = self.build_access_token_parameters(values)
        try:
            auth_response = self.do_token_request(data).json()
        except ClientError:
            raise BrowserIncorrectPassword()

        self.update_token(auth_response)

Example #17

0

Show file

    def do_login(self):
        params = {'response_type': 'code',
                  'client_id': '534890559860-r6gn7e3agcpiriehe63dkeus0tpl5i4i.apps.googleusercontent.com',
                  'redirect_uri': self.redirect_uri}

        queryString = "&".join([key+'='+value for key, value in params.items()])
        self.google_login.go(auth='o/oauth2/auth', params=queryString).login(self.username, self.password)

        if self.google_login.is_here():
            self.page.login(self.username, self.password)

        try:
            self.code = parse_qs(urlparse(self.url).query).get('code')[0]
        except:
            raise BrowserIncorrectPassword()

Example #18

0

Show file

File: browsers.py Project: P4ncake/weboob

    def get_referrer(self, oldurl, newurl):
        """
        Get the referrer to send when doing a request.
        If we should not send a referrer, it will return None.

        Reference: https://en.wikipedia.org/wiki/HTTP_referer

        The behavior can be controlled through the ALLOW_REFERRER attribute.
        True always allows the referers
        to be sent, False never, and None only if it is within
        the same domain.

        :param oldurl: Current absolute URL
        :type oldurl: str or None

        :param newurl: Target absolute URL
        :type newurl: str

        :rtype: str or None
        """
        if self.ALLOW_REFERRER is False:
            return
        if oldurl is None:
            return
        old = urlparse(oldurl)
        new = urlparse(newurl)
        # Do not leak secure URLs to insecure URLs
        if old.scheme == 'https' and new.scheme != 'https':
            return
        # Reloading the page. Usually no referrer.
        if oldurl == newurl:
            return
        # Domain-based privacy
        if self.ALLOW_REFERRER is None and old.netloc != new.netloc:
            return
        return oldurl

Example #19

0

Show file

    def get_referrer(self, oldurl, newurl):
        """
        Get the referrer to send when doing a request.
        If we should not send a referrer, it will return None.

        Reference: https://en.wikipedia.org/wiki/HTTP_referer

        The behavior can be controlled through the ALLOW_REFERRER attribute.
        True always allows the referers
        to be sent, False never, and None only if it is within
        the same domain.

        :param oldurl: Current absolute URL
        :type oldurl: str or None

        :param newurl: Target absolute URL
        :type newurl: str

        :rtype: str or None
        """
        if self.ALLOW_REFERRER is False:
            return
        if oldurl is None:
            return
        old = urlparse(oldurl)
        new = urlparse(newurl)
        # Do not leak secure URLs to insecure URLs
        if old.scheme == 'https' and new.scheme != 'https':
            return
        # Reloading the page. Usually no referrer.
        if oldurl == newurl:
            return
        # Domain-based privacy
        if self.ALLOW_REFERRER is None and old.netloc != new.netloc:
            return
        return oldurl

Example #20

0

Show file

File: google.py Project: laurentb/weboob

    def do_login(self):
        params = {'response_type': 'code',
                  'client_id': '534890559860-r6gn7e3agcpiriehe63dkeus0tpl5i4i.apps.googleusercontent.com',
                  'redirect_uri': self.redirect_uri}

        queryString = "&".join([key+'='+value for key, value in params.items()])
        self.google_login.go(auth='o/oauth2/auth', params=queryString).login(self.username, self.password)

        if self.google_login.is_here():
            self.page.login(self.username, self.password)

        try:
            self.code = parse_qs(urlparse(self.url).query).get('code')[0]
        except:
            raise BrowserIncorrectPassword()

Example #21

0

Show file

File: accounts_list.py Project: antibios/weboob

        def prepare_url(url, fields):
            components = urlparse(url)
            query_pairs = [(f, v) for (f, v) in parse_qsl(components.query)
                           if f not in fields]

            for (field, value) in fields.items():
                query_pairs.append((field, value))

            new_query_str = urlencode(query_pairs)

            new_components = (components.scheme, components.netloc,
                              components.path, components.params,
                              new_query_str, components.fragment)

            return urlunparse(new_components)

Example #22

0

Show file

    def request_access_token(self, auth_uri):
        self.logger.info('requesting access token')

        if isinstance(auth_uri, dict):
            values = auth_uri
        else:
            values = dict(parse_qsl(urlparse(auth_uri).query))
        self.handle_callback_error(values)
        data = self.build_access_token_parameters(values)
        try:
            auth_response = self.do_token_request(data).json()
        except ClientError:
            raise BrowserIncorrectPassword()

        self.update_token(auth_response)

Example #23

0

Show file

File: browser.py Project: laurentb/weboob

    def quit_market_website(self):
        parsed = urlparse(self.url)
        exit_url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect' % (parsed.scheme, parsed.netloc)
        page = self.open(exit_url).page
        try:
            form = page.get_form(name='formulaire')
        except FormNotFound:
            msg = CleanText(u'//b[contains(text() , "Nous vous invitons à créer un mot de passe trading.")]')(self.page.doc)
            if msg:
                raise ActionNeeded(msg)
        else:
            # 'act' parameter allows page recognition, this parameter is ignored by
            # server
            self.location(form.url + '&act=Synthepargnes')

        self.update_sag()

Example #24

0

Show file

File: videoob.py Project: P4ncake/weboob

    def download(self, video, dest, default=None):
        if not video.url:
            print('Error: the direct URL is not available.', file=self.stderr)
            return 4

        def check_exec(executable):
            with open(os.devnull, 'w') as devnull:
                process = subprocess.Popen(['which', executable], stdout=devnull)
                if process.wait() != 0:
                    print('Please install "%s"' % executable, file=self.stderr)
                    return False
            return True

        dest = self.obj_to_filename(video, dest, default)

        if video.url.startswith('rtmp'):
            if not check_exec('rtmpdump'):
                return 1
            args = ('rtmpdump', '-e', '-r', video.url, '-o', dest)
        elif video.url.startswith('mms'):
            if not check_exec('mimms'):
                return 1
            args = ('mimms', '-r', video.url, dest)
        elif u'm3u8' == video.ext:
            _dest, _ = os.path.splitext(dest)
            dest = u'%s.%s' % (_dest, 'mp4')
            content = tuple()
            parsed_uri = urlparse(video.url)
            baseurl = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
            for line in self.read_url(video.url):
                line = line.decode('utf-8')
                if not line.startswith('#'):
                    if not line.startswith('http'):
                        line = u'%s%s' % (baseurl, line)
                    content += (line,)

            args = ('wget', '-nv',) + content + ('-O', dest)
        else:
            if check_exec('wget'):
                args = ('wget', '-c', video.url, '-O', dest)
            elif check_exec('curl'):
                args = ('curl', '-C', '-', video.url, '-o', dest)
            else:
                return 1

        self.logger.debug(' '.join(args))
        os.spawnlp(os.P_WAIT, args[0], *args)

Example #25

0

Show file

File: videoob.py Project: linura/weboob

    def download(self, video, dest, default=None):
        if not video.url:
            print('Error: the direct URL is not available.', file=self.stderr)
            return 4

        def check_exec(executable):
            with open(os.devnull, 'w') as devnull:
                process = subprocess.Popen(['which', executable], stdout=devnull)
                if process.wait() != 0:
                    print('Please install "%s"' % executable, file=self.stderr)
                    return False
            return True

        dest = self.obj_to_filename(video, dest, default)

        if video.url.startswith('rtmp'):
            if not check_exec('rtmpdump'):
                return 1
            args = ('rtmpdump', '-e', '-r', video.url, '-o', dest)
        elif video.url.startswith('mms'):
            if not check_exec('mimms'):
                return 1
            args = ('mimms', '-r', video.url, dest)
        elif u'm3u8' == video.ext:
            _dest, _ = os.path.splitext(dest)
            dest = u'%s.%s' % (_dest, 'mp4')
            content = tuple()
            parsed_uri = urlparse(video.url)
            baseurl = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
            for line in self.read_url(video.url):
                line = line.decode('utf-8')
                if not line.startswith('#'):
                    if not line.startswith('http'):
                        line = u'%s%s' % (baseurl, line)
                    content += (line,)

            args = ('wget', '-nv',) + content + ('-O', dest)
        else:
            if check_exec('wget'):
                args = ('wget', '-c', video.url, '-O', dest)
            elif check_exec('curl'):
                args = ('curl', '-C', '-', video.url, '-o', dest)
            else:
                return 1

        self.logger.debug(' '.join(args))
        os.spawnlp(os.P_WAIT, args[0], *args)

Example #26

0

Show file

File: browser.py Project: laurentb/weboob

    def moveto_market_website(self, account, home=False):
        response = self.open(account.url % self.sag).text
        self._sag = None
        # https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.navigation.AccueilBridge?TOKEN_ID=
        m = re.search('document.location="([^"]+)"', response)
        if m:
            url = m.group(1)
        else:
            self.logger.warning('Unable to go to market website')
            raise WebsiteNotSupported()

        self.open(url)
        if home:
            return 'https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis'
        parsed = urlparse(url)
        url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.account.WalletVal?nump=%s:%s'
        return url % (parsed.scheme, parsed.netloc, account.id, self.code_caisse)

Example #27

0

Show file

File: accounts_list.py Project: antibios/weboob

    def iter_transactions(self):
        url = self.get_part_url()
        if url is None:
            # There are no transactions in this kind of account
            return

        is_deferred_card = bool(
            self.doc.xpath(u'//div[contains(text(), "Différé")]'))
        has_summary = False
        while True:
            d = XML(self.browser.open(url).content)
            el = d.xpath('//dataBody')
            if not el:
                return

            el = el[0]
            s = unicode(el.text).encode('iso-8859-1')
            doc = fromstring(s)

            for tr in self._iter_transactions(doc):
                if tr.type == Transaction.TYPE_CARD_SUMMARY:
                    has_summary = True
                if is_deferred_card and tr.type is Transaction.TYPE_CARD:
                    tr.type = Transaction.TYPE_DEFERRED_CARD
                    if not has_summary:
                        tr._coming = True
                yield tr

            el = d.xpath('//dataHeader')[0]
            if int(el.find('suite').text) != 1:
                return

            url = urlparse(url)
            p = parse_qs(url.query)

            args = {}
            args['n10_nrowcolor'] = 0
            args['operationNumberPG'] = el.find('operationNumber').text
            args['operationTypePG'] = el.find('operationType').text
            args['pageNumberPG'] = el.find('pageNumber').text
            args['idecrit'] = el.find('idecrit').text or ''
            args['sign'] = p['sign'][0]
            args['src'] = p['src'][0]

            url = '%s?%s' % (url.path, urlencode(args))

Example #28

0

Show file

File: browser.py Project: antibios/weboob

    def moveto_market_website(self, account, home=False):
        response = self.open(account.url % self.sag).text
        self._sag = None
        # https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.navigation.AccueilBridge?TOKEN_ID=
        m = re.search('document.location="([^"]+)"', response)
        if m:
            url = m.group(1)
        else:
            self.logger.warn('Unable to go to market website')
            raise WebsiteNotSupported()

        self.open(url)
        if home:
            return 'https://www.cabourse.credit-agricole.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis'
        parsed = urlparse(url)
        url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.account.WalletVal?nump=%s:%s'
        return url % (parsed.scheme, parsed.netloc, account.id,
                      self.code_caisse)

Example #29

0

Show file

    def do_login(self):
        self.login_page.go()
        self.page.login(self.username, self.password, self.lastname)

        # q is timestamp millisecond
        self.app_config.go(params={'q': int(time()*1000)})
        client_id = self.page.get_client_id()

        params = {
            'client_id': client_id,
            'response_type': 'id_token token',
            'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/'
        }
        self.location('https://oauth2.bouyguestelecom.fr/authorize', params=params)
        fragments = dict(parse_qsl(urlparse(self.url).fragment))

        self.id_personne = jwt.get_unverified_claims(fragments['id_token'])['id_personne']
        authorization = 'Bearer ' + fragments['access_token']
        self.headers = {'Authorization': authorization}

Example #30

0

Show file

File: browser.py Project: antibios/weboob

    def quit_market_website(self):
        parsed = urlparse(self.url)
        exit_url = '%s://%s/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect' % (
            parsed.scheme, parsed.netloc)
        page = self.open(exit_url).page
        try:
            form = page.get_form(name='formulaire')
        except FormNotFound:
            msg = CleanText(
                u'//b[contains(text() , "Nous vous invitons à créer un mot de passe trading.")]'
            )(self.page.doc)
            if msg:
                raise ActionNeeded(msg)
        else:
            # 'act' parameter allows page recognition, this parameter is ignored by
            # server
            self.location(form.url + '&act=Synthepargnes')

        self.update_sag()

Example #31

0

Show file

File: accounts_list.py Project: P4ncake/weboob

        def prepare_url(url, fields):
            components = urlparse(url)
            query_pairs = [(f, v) for (f, v) in parse_qsl(components.query) if f not in fields]

            for (field, value) in fields.items():
                query_pairs.append((field, value))

            new_query_str = urlencode(query_pairs)

            new_components = (
                components.scheme,
                components.netloc,
                components.path,
                components.params,
                new_query_str,
                components.fragment
            )

            return urlunparse(new_components)

Example #32

0

Show file

    def do_login(self):
        self.login_page.go()

        try:
            self.page.login(self.username, self.password, self.lastname)
        except ClientError as e:
            if e.response.status_code == 401:
                raise BrowserIncorrectPassword()
            raise

        if self.login_page.is_here():
            msg = self.page.get_error_message()
            raise BrowserIncorrectPassword(msg)

        if self.forgotten_password_page.is_here():
            # when too much attempt has been done in a short time, bouygues redirect us here,
            # but no message is available on this page
            raise BrowserIncorrectPassword()

        # q is timestamp millisecond
        self.app_config.go(params={'q': int(time() * 1000)})
        client_id = self.page.get_client_id()

        params = {
            'client_id': client_id,
            'response_type': 'id_token token',
            'redirect_uri': 'https://www.bouyguestelecom.fr/mon-compte/'
        }
        self.location('https://oauth2.bouyguestelecom.fr/authorize',
                      params=params)
        fragments = dict(parse_qsl(urlparse(self.url).fragment))

        self.id_personne = jwt.get_unverified_claims(
            fragments['id_token'])['id_personne']
        authorization = 'Bearer ' + fragments['access_token']
        self.headers = {'Authorization': authorization}

Example #33

0

Show file

 def build_authorization_uri(self):
     p = urlparse(self.AUTHORIZATION_URI)
     q = dict(parse_qsl(p.query))
     q.update(self.build_authorization_parameters())
     return p._replace(query=urlencode(q)).geturl()

Example #34

0

Show file

File: browser.py Project: antibios/weboob

    def do_login(self):
        """
        Attempt to log in.
        Note: this method does nothing if we are already logged in.
        """
        self.BASEURL = 'https://%s/' % self.first_domain
        self._sag = None

        if not self.home_page.is_here():
            self.home_page.go()

        if self.new_login:
            self.page.go_to_auth()
            parsed = urlparse(self.url)
            self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc)
        else:
            # On the homepage, we get the URL of the auth service.
            url = self.page.get_post_url()
            if url is None:
                raise WebsiteNotSupported()

            # First, post account number to get the password prompt.
            data = {
                'CCPTE': self.username[:11].encode('iso8859-15'),
                'canal': 'WEB',
                'hauteur_ecran': 768,
                'largeur_ecran': 1024,
                'liberror': '',
                'matrice': 'true',
                'origine': 'vitrine',
                'situationTravail': 'BANCAIRE',
                'typeAuthentification': 'CLIC_ALLER',
                'urlOrigine': self.page.url,
                'vitrine': 0,
            }

            parsed = urlparse(url)
            self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc)
            self.location(url, data=data)

        assert self.login_page.is_here()

        # Then, post the password.
        self.page.login(self.username, self.password)

        if self.new_login:
            url = self.page.get_accounts_url()
        else:
            # The result of POST is the destination URL.
            url = self.page.get_result_url()

        if not url.startswith('http'):
            raise BrowserIncorrectPassword(unescape(url, unicode_snob=True))

        self.location(url.replace('Synthese', 'Synthcomptes'))

        if self.login_error.is_here():
            raise BrowserIncorrectPassword()

        if self.page is None:
            raise WebsiteNotSupported()

        if not self.accounts.is_here():
            # Sometimes the home page is Releves.
            new_url = re.sub('act=([^&=]+)', 'act=Synthcomptes', self.page.url,
                             1)
            self.location(new_url)

        if not self.accounts.is_here():
            raise BrowserIncorrectPassword()

        if self.code_caisse is None:
            self.code_caisse = self.page.get_code_caisse()

        # Store the current url to go back when requesting accounts list.
        self.accounts_url = re.sub('sessionSAG=[^&]+', 'sessionSAG={0}',
                                   self.page.url)

        # we can deduce the URL to "savings" and "loan" accounts from the regular accounts one
        self.savings_url = re.sub('act=([^&=]+)', 'act=Synthepargnes',
                                  self.accounts_url, 1)
        self.loans_url = re.sub('act=([^&=]+)', 'act=Synthcredits',
                                self.accounts_url, 1)
        self.advisor_url = re.sub('act=([^&=]+)', 'act=Contact',
                                  self.accounts_url, 1)
        self.profile_url = re.sub('act=([^&=]+)', 'act=Coordonnees',
                                  self.accounts_url, 1)

        if self.page.check_perimeters() and not self.broken_perimeters:
            self.perimeter_url = re.sub('act=([^&=]+)', 'act=Perimetre',
                                        self.accounts_url, 1)
            self.chg_perimeter_url = '%s%s' % (re.sub(
                'act=([^&=]+)', 'act=ChgPerim', self.accounts_url,
                1), '&typeaction=ChgPerim')
            self.location(self.perimeter_url.format(self.sag))
            self.page.check_multiple_perimeters()

Example #35

0

Show file

File: browser.py Project: laurentb/weboob

    def do_login(self):
        """
        Attempt to log in.
        Note: this method does nothing if we are already logged in.
        """
        self.BASEURL = 'https://%s/' % self.first_domain
        self._sag = None

        if not self.home_page.is_here():
            self.home_page.go()

        if self.new_website.is_here():
            self.logger.warning('This connection uses the new API website')
            raise SiteSwitch('api')

        if self.new_login:
            self.page.go_to_auth()
            parsed = urlparse(self.url)
            self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc)
        else:
            # On the homepage, we get the URL of the auth service.
            url = self.page.get_post_url()
            if url is None:
                raise WebsiteNotSupported()

            # First, post account number to get the password prompt.
            data = {'CCPTE':                self.username[:11].encode('iso8859-15'),
                    'canal':                'WEB',
                    'hauteur_ecran':        768,
                    'largeur_ecran':        1024,
                    'liberror':             '',
                    'matrice':              'true',
                    'origine':              'vitrine',
                    'situationTravail':     'BANCAIRE',
                    'typeAuthentification': 'CLIC_ALLER',
                    'urlOrigine':           self.page.url,
                    'vitrine':              0,
                }

            parsed = urlparse(url)
            self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc)
            self.location(url, data=data)

        assert self.login_page.is_here()

        # Then, post the password.
        self.page.login(self.username, self.password)

        if self.new_login:
            url = self.page.get_accounts_url()
        else:
            # The result of POST is the destination URL.
            url = self.page.get_result_url()

        if not url.startswith('http'):
            raise BrowserIncorrectPassword(unescape(url, unicode_snob=True))

        self.location(url.replace('Synthese', 'Synthcomptes'))

        if self.login_error.is_here():
            raise BrowserIncorrectPassword()

        if self.page is None:
            raise WebsiteNotSupported()

        if not self.accounts.is_here():
            # Sometimes the home page is Releves.
            new_url  = re.sub('act=([^&=]+)', 'act=Synthcomptes', self.page.url, 1)
            self.location(new_url)

        if not self.accounts.is_here():
            raise BrowserIncorrectPassword()

        if self.code_caisse is None:
            self.code_caisse = self.page.get_code_caisse()

        # Store the current url to go back when requesting accounts list.
        self.accounts_url = re.sub('sessionSAG=[^&]+', 'sessionSAG={0}', self.page.url)

        # we can deduce the URL to "savings" and "loan" accounts from the regular accounts one
        self.savings_url  = re.sub('act=([^&=]+)', 'act=Synthepargnes', self.accounts_url, 1)
        self.loans_url  = re.sub('act=([^&=]+)', 'act=Synthcredits', self.accounts_url, 1)
        self.advisor_url  = re.sub('act=([^&=]+)', 'act=Contact', self.accounts_url, 1)
        self.profile_url  = re.sub('act=([^&=]+)', 'act=Coordonnees', self.accounts_url, 1)

        if self.page.check_perimeters() and not self.broken_perimeters:
            self.perimeter_url = re.sub('act=([^&=]+)', 'act=Perimetre', self.accounts_url, 1)
            self.chg_perimeter_url = '%s%s' % (re.sub('act=([^&=]+)', 'act=ChgPerim', self.accounts_url, 1), '&typeaction=ChgPerim')
            self.location(self.perimeter_url.format(self.sag))
            self.page.check_multiple_perimeters()

Example #36

0

Show file

File: module.py Project: P4ncake/weboob

 def search_galleries(self, pattern, sortby=CapGallery.SEARCH_RELEVANCE):
     pattern = pattern.lower()
     url = self.url()
     if pattern in url or pattern in self.browser.get_title().lower():
         yield self.get_gallery(urlparse(url).netloc)

Example #37

0

Show file

File: torrents.py Project: P4ncake/weboob

    def iter_torrents(self):
        table = self.document.getroot().cssselect('table.torrent_table')
        if not table:
            table = self.document.getroot().cssselect('table#browse_torrent_table')
        if table:
            table = table[0]
            current_group = None
            for tr in table.findall('tr'):
                if tr.attrib.get('class', '') == 'colhead':
                    # ignore
                    continue
                if tr.attrib.get('class', '') == 'group':
                    tds = tr.findall('td')
                    current_group = u''
                    div = tds[-6]
                    if div.getchildren()[0].tag == 'div':
                        div = div.getchildren()[0]
                    for a in div.findall('a'):
                        if not a.text:
                            continue
                        if current_group:
                            current_group += ' - '
                        current_group += a.text
                elif tr.attrib.get('class', '').startswith('group_torrent') or \
                        tr.attrib.get('class', '').startswith('torrent'):
                    tds = tr.findall('td')

                    title = current_group
                    if len(tds) == 7:
                        # Under a group
                        i = 0
                    elif len(tds) in (8, 9):
                        # An alone torrent
                        i = len(tds) - 1
                        while i >= 0 and tds[i].find('a') is None:
                            i -= 1
                    else:
                        # Useless title
                        continue

                    if title:
                        title += u' (%s)' % tds[i].find('a').text
                    else:
                        title = ' - '.join([a.text for a in tds[i].findall('a')])
                    url = urlparse(tds[i].find('a').attrib['href'])
                    params = parse_qs(url.query)
                    if 'torrentid' in params:
                        id = '%s.%s' % (params['id'][0], params['torrentid'][0])
                    else:
                        url = tds[i].find('span').find('a').attrib['href']
                        m = self.TORRENTID_REGEXP.match(url)
                        if not m:
                            continue
                        id = '%s.%s' % (params['id'][0], m.group(1))
                    try:
                        size, unit = tds[i + 3].text.split()
                    except ValueError:
                        size, unit = tds[i + 2].text.split()
                    size = get_bytes_size(float(size.replace(',', '')), unit)
                    seeders = int(tds[-2].text)
                    leechers = int(tds[-1].text)

                    torrent = Torrent(id, title)
                    torrent.url = self.format_url(url)
                    torrent.size = size
                    torrent.seeders = seeders
                    torrent.leechers = leechers
                    yield torrent
                else:
                    debug('unknown attrib: %s' % tr.attrib)

Example #38

0

Show file

File: browser.py Project: dkremer-ledger/weboob

 def fc_redirect(self, url):
     self.BASEURL = 'https://app.franceconnect.gouv.fr'
     self.location(url)
     self.page.redirect()
     parse_result = urlparse(self.url)
     self.BASEURL = parse_result.scheme + '://' + parse_result.netloc

Example #39

0

Show file

File: browser.py Project: P4ncake/weboob

 def getCurrentSubBank(self):
     # the account list and history urls depend on the sub bank of the user
     paths = urlparse(self.url).path.lstrip('/').split('/')
     self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else ""
     if paths[0] in ["fr", "mabanque"]:
         self.is_new_website = True

Example #40

0

Show file

File: pages.py Project: linura/weboob

 def get_split_path(self):
     ret = urlparse(self.url).path.split('/')[1:]
     if not ret[0]:
         ret = ret[1:]
     return ret

Example #41

0

Show file

File: pages.py Project: P4ncake/weboob

 def get_split_path(self):
     ret = urlparse(self.url).path.split('/')[1:]
     if not ret[0]:
         ret = ret[1:]
     return ret

Example #42

0

Show file

File: browser.py Project: guix77/weboob

 def set_base_url(self, place):
     if not place:
         place = self.default_place
     self.base.go(data={'query': place})
     parsed_uri = urlparse(self.page.url)
     self.BASEURL = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)

Example #43

0

Show file

 def get_context_token(self):
     parameters = dict(parse_qsl(urlparse(self.url).query))
     return parameters.get('context_token', None)

Example #44

0

Show file

def add_qs(url, **kwargs):
    parts = list(urlparse(url))
    qs = OrderedDict(parse_qsl(parts[4]))
    qs.update(kwargs)
    parts[4] = urlencode(qs)
    return urlunparse(parts)

Example #45

0

Show file

File: torrents.py Project: linura/weboob

    def iter_torrents(self):
        table = self.document.getroot().cssselect('table.torrent_table')
        if not table:
            table = self.document.getroot().cssselect(
                'table#browse_torrent_table')
        if table:
            table = table[0]
            current_group = None
            for tr in table.findall('tr'):
                if tr.attrib.get('class', '') == 'colhead':
                    # ignore
                    continue
                if tr.attrib.get('class', '') == 'group':
                    tds = tr.findall('td')
                    current_group = u''
                    div = tds[-6]
                    if div.getchildren()[0].tag == 'div':
                        div = div.getchildren()[0]
                    for a in div.findall('a'):
                        if not a.text:
                            continue
                        if current_group:
                            current_group += ' - '
                        current_group += a.text
                elif tr.attrib.get('class', '').startswith('group_torrent') or \
                        tr.attrib.get('class', '').startswith('torrent'):
                    tds = tr.findall('td')

                    title = current_group
                    if len(tds) == 7:
                        # Under a group
                        i = 0
                    elif len(tds) in (8, 9):
                        # An alone torrent
                        i = len(tds) - 1
                        while i >= 0 and tds[i].find('a') is None:
                            i -= 1
                    else:
                        # Useless title
                        continue

                    if title:
                        title += u' (%s)' % tds[i].find('a').text
                    else:
                        title = ' - '.join(
                            [a.text for a in tds[i].findall('a')])
                    url = urlparse(tds[i].find('a').attrib['href'])
                    params = parse_qs(url.query)
                    if 'torrentid' in params:
                        id = '%s.%s' % (params['id'][0],
                                        params['torrentid'][0])
                    else:
                        url = tds[i].find('span').find('a').attrib['href']
                        m = self.TORRENTID_REGEXP.match(url)
                        if not m:
                            continue
                        id = '%s.%s' % (params['id'][0], m.group(1))
                    try:
                        size, unit = tds[i + 3].text.split()
                    except ValueError:
                        size, unit = tds[i + 2].text.split()
                    size = get_bytes_size(float(size.replace(',', '')), unit)
                    seeders = int(tds[-2].text)
                    leechers = int(tds[-1].text)

                    torrent = Torrent(id, title)
                    torrent.url = self.format_url(url)
                    torrent.size = size
                    torrent.seeders = seeders
                    torrent.leechers = leechers
                    yield torrent
                else:
                    debug('unknown attrib: %s' % tr.attrib)

Example #46

0

Show file

File: browser.py Project: linura/weboob

 def getCurrentSubBank(self):
     # the account list and history urls depend on the sub bank of the user
     paths = urlparse(self.url).path.lstrip('/').split('/')
     self.currentSubBank = paths[0] + "/" if paths[0] != "fr" else ""
     if paths[0] in ["fr", "mabanque"]:
         self.is_new_website = True

Example #47

0

Show file

File: account_pages.py Project: P4ncake/weboob

 def get_params(self, url):
     parsed = urlparse(url)
     base_url, params = parsed.path, parse_qs(parsed.query)
     for a in self.doc.xpath('//form[@name="FORM_LIB_CARTE"]//a[contains(@href, "sessionid")]'):
         params['sessionid'] = parse_qs(urlparse(Link('.')(a)).query)['sessionid']
         yield base_url, params

Example #48

0

Show file

File: browser.py Project: P4ncake/weboob

 def set_base_url(self, place):
     if not place:
         place = self.default_place
     self.base.go(data={'query': place})
     parsed_uri = urlparse(self.page.url)
     self.BASEURL = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)

Example #49

0

Show file

 def get_code(self):
     return parse_qs(urlparse(self.url).query)['code'][0]

Example #50

0

Show file

File: pages.py Project: laurentb/weboob

def add_qs(url, **kwargs):
    parts = list(urlparse(url))
    qs = OrderedDict(parse_qsl(parts[4]))
    qs.update(kwargs)
    parts[4] = urlencode(qs)
    return urlunparse(parts)

Example #51

0

Show file

File: module.py Project: linura/weboob

 def get_gallery(self, _id):
     url = self.config['url'].get()
     return BaseGallery(_id, title=urlparse(url).netloc, url=url)

Example #52

0

Show file

 def search_galleries(self, pattern, sortby=CapGallery.SEARCH_RELEVANCE):
     pattern = pattern.lower()
     url = self.url()
     if pattern in url or pattern in self.browser.get_title_icon()[0].lower(
     ):
         yield self.get_gallery(urlparse(url).netloc)

Example #53

0

Show file

File: browsers.py Project: laurentb/weboob

 def build_authorization_uri(self):
     p = urlparse(self.AUTHORIZATION_URI)
     q = dict(parse_qsl(p.query))
     q.update(self.build_authorization_parameters())
     return p._replace(query=urlencode(q)).geturl()