コード例 #1
0
 def get_authorize_url(self,
                       client_id,
                       audience=None,
                       state=None,
                       redirect_uri=None,
                       response_type='code',
                       scope='openid',
                       quote_via=quote_plus):
     """
     use quote_via=urllib.quote to to urlencode spaces into "%20", the default is "+"
     """
     params = {
         'client_id': client_id,
         'audience': audience,
         'response_type': response_type,
         'scope': scope,
         'state': state,
         'redirect_uri': redirect_uri
     }
     query = urlencode(params, doseq=True, quote_via=quote_via) \
         if _ver > '34' \
         else '&'.join(['{}={}'.format(quote_via(k, safe=''), quote_via(v, safe=''))
                        for k, v in params.items()])
     return urlunparse(
         ['https', self.domain, '/authorize', None, query, None])
コード例 #2
0
ファイル: request.py プロジェクト: retrotrader/a4kScrapers
    def get(self, url, headers={}, allow_redirects=True):
        parsed_url = urlparse(url)

        response = self.head(_get_domain(url))
        if response is None:
            return None

        (url, status_code) = response
        if status_code != 200:
            return None

        resolved_url = urlparse(url)
        url = urlunparse((
            resolved_url.scheme,
            resolved_url.netloc,
            parsed_url.path,
            parsed_url.params,
            parsed_url.query,
            parsed_url.fragment,
        ))

        tools.log('GET: %s' % url, 'info')
        request = lambda x: _get(self._cfscrape, url, headers, self._timeout,
                                 allow_redirects, x)
        request.url = url

        return self._request_core(request)
コード例 #3
0
ファイル: gflareresponse.py プロジェクト: beb7/gflare-tk
 def get_robots_txt_url(self, url):
     comps = parse_url(url)
     url = requote_uri(
         urlunparse([
             comps.scheme, comps.host, 'robots.txt', None, comps.query,
             comps.fragment
         ]))
     return url
コード例 #4
0
  def get_url(self, path='/', websocket=False, remote=True,
              attach_api_key=True, userId=None, pass_uid=False, **query):
    '''construct a url for an emby request

    Parameters
    ----------
    path : str
      uri path(excluding domain and port) of get request for emby
    websocket : bool, optional
      if true, then `ws(s)` are used instead of `http(s)`
    remote : bool, optional
      if true, remote-address is used (default True)
    attach_api_key : bool, optional
      if true, apikey is added to the query (default True)
    userId : str, optional
      uid to use, if none, default is used
    pass_uid : bool, optional
      if true, uid is added to the query (default False)
    query : karg dict
      additional parameters to set (part of url after the `?`)

    Also See
    --------
      get :
      getJson :
      post :
      delete :

    Returns
    -------
    full url
    '''
    userId = userId or self.userid
    if attach_api_key and self.api_key:
      query.update({'api_key':self.api_key, 'deviceId': self.device_id})
    if pass_uid:
      query['userId'] = userId

    if remote:
      url = self.urlremote or self.url
    else:
      url = self.url

    if websocket:
      scheme = {'http':'ws', 'https':'wss'}[url.scheme]
    else:
      scheme = url.scheme
    netloc = url.netloc + '/emby'

    url = urlunparse((scheme, netloc, path, '', '{params}', '')).format(
      UserId   = userId,
      ApiKey   = self.api_key,
      DeviceId = self.device_id,
      params   = urlencode(query)
    )

    return url[:-1] if url[-1] == '?' else url
コード例 #5
0
ファイル: util.py プロジェクト: pagreene/grip
def process_url(url):
    scheme, netloc, path, params, query, frag = urlparse(url)
    query = ""
    frag = ""
    params = ""
    if scheme == "":
        scheme = "http"
    if netloc == "" and path != "":
        netloc = path.split("/")[0]
        path = ""
    return urlunparse((scheme, netloc, path, params, query, frag))
コード例 #6
0
ファイル: __init__.py プロジェクト: jack2015/TSmedia
    def solve_cf_challenge(self, resp, **original_kwargs):
        start_time = time.time()
        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = parsed_url.netloc
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
        cloudflare_kwargs = copy.deepcopy(original_kwargs)
        headers = cloudflare_kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url
        try:
            params = cloudflare_kwargs["params"] = OrderedDict(
                re.findall(r'name="(s|jschl_vc|pass)"(?: [^<>]*)? value="(.+?)"', body)
            )

            for k in ("jschl_vc", "pass"):
                if k not in params:
                    raise ValueError("%s is missing from challenge form" % k)
        except Exception as e:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            raise ValueError(
                "Unable to parse Cloudflare anti-bot IUAM page: %s %s"
                % (e.message, BUG_REPORT)
            )

        # Solve the Javascript challenge
        answer, delay = self.solve_challenge(body, domain)
        params["jschl_answer"] = answer
        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        method = resp.request.method
        cloudflare_kwargs["allow_redirects"] = False
        # Cloudflare requires a delay before solving the challenge
        time.sleep(max(delay - (time.time() - start_time), 0))
        # Send the challenge response and handle the redirect manually
        redirect = self.request(method, submit_url, **cloudflare_kwargs)
        redirect_location = urlparse(redirect.headers["Location"])
        if not redirect_location.netloc:
            redirect_url = urlunparse(
                (
                    parsed_url.scheme,
                    domain,
                    redirect_location.path,
                    redirect_location.params,
                    redirect_location.query,
                    redirect_location.fragment,
                )
            )
            return self.request(method, redirect_url, **original_kwargs)
        return self.request(method, redirect.headers["Location"], **original_kwargs)
コード例 #7
0
ファイル: utils.py プロジェクト: phanaj/gentb-site
 def public_url(self):
     """
     Return the public url, often the same as the private url, but
     it can be replaced by a url:// wrapper which caches passwords
     onto the disk.
     """
     url = self.url
     if url.password:
         digest = md5(url.netloc).hexdigest()
         with open(self._cache_file(digest), 'w') as fhl:
             fhl.write(json.dumps({'netloc': url.netloc, 'scheme': url.scheme}))
             url = url._replace(netloc=digest, scheme='url')
     return urlunparse(url)
コード例 #8
0
ファイル: cfscrape.py プロジェクト: N2Roar/roar-repository
 def solve_cf_challenge(self, resp, **original_kwargs):
     self.tries += 1
     start_time = time.time()
     body = resp.text
     parsed_url = urlparse(resp.url)
     domain = parsed_url.netloc
     submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme,
                                                   domain)
     cloudflare_kwargs = copy.deepcopy(original_kwargs)
     headers = cloudflare_kwargs.setdefault("headers", {})
     headers["Referer"] = resp.url
     try:
         params = cloudflare_kwargs["params"] = OrderedDict(
             re.findall(
                 r'name="(s|jschl_vc|pass)"(?: [^<>]*)? value="(.+?)"',
                 body))
         for k in ("jschl_vc", "pass"):
             if k not in params:
                 raise ValueError("%s is missing from challenge form" % k)
     except Exception as e:
         raise ValueError(
             "Unable to parse Cloudflare anti-bot IUAM page: %s %s" %
             (e.message, BUG_REPORT))
     try:
         answer, delay = solve_challenge(body, domain)
     except:
         self.raise_captcha_error()
     params["jschl_answer"] = answer
     method = resp.request.method
     cloudflare_kwargs["allow_redirects"] = False
     if not self.delay:
         time.sleep(max(delay - (time.time() - start_time), 0))
     else:
         time.sleep(self.delay)
     redirect = self.request(method, submit_url, **cloudflare_kwargs)
     redirect_location = urlparse(redirect.headers["Location"])
     if not redirect_location.netloc:
         redirect_url = urlunparse((
             parsed_url.scheme,
             domain,
             redirect_location.path,
             redirect_location.params,
             redirect_location.query,
             redirect_location.fragment,
         ))
         return self.request(method, redirect_url, **original_kwargs)
     return self.request(method, redirect.headers["Location"],
                         **original_kwargs)
コード例 #9
0
ファイル: gflareresponse.py プロジェクト: beb7/gflare-tk
    def sanitise_url(self, url: str, base_url='') -> str:
        """Cleans a given input URL and returns a RFC compliant URL as a string."""

        if isinstance(url, bytes):
            url = url.decode('utf8')
        else:
            url = str(url)

        # Remove leading whitespaces from url
        url = url.lstrip()

        if base_url:
            url = urljoin(base_url, url)

        try:
            scheme, auth, host, port, path, query, fragment = parse_url(url)
        except:
            return None

        # Carefully reconstruct the network location
        netloc = auth or ''
        if netloc:
            netloc += '@'
        netloc += host
        if port:
            # Only report on ports if they are used in a non-standard way
            if scheme == 'http' and port == 80:
                pass
            elif scheme == 'https' and port == 443:
                pass
            else:
                netloc += ':' + str(port)

        # Bare domains aren't valid URLs.
        if not path:
            path = '/'

        url = requote_uri(
            urlunparse([scheme, netloc, path, None, query, fragment]))

        # Search engines ignore hash fragments hence we remove them from URLs
        url = url.split('#')[0]

        return url
コード例 #10
0
ファイル: connection.py プロジェクト: tozka/pyloginsight
    def get_session(self, previousresponse, **kwargs):
        """Perform a session login and return a new session ID."""
        if self.username is None or self.password is None:
            raise Unauthorized("Cannot authenticate without username/password")
        logger.info("Attempting to authenticate as {0}".format(self.username))
        authdict = {
            "username": self.username,
            "password": self.password,
            "provider": self.provider
        }

        prep = previousresponse.request.copy()
        try:
            del prep.headers['Authorization']
        except KeyError:
            pass  # Better to ask for forgiveness than to look before you leap
        if 'Authorization' in prep.headers:
            del prep.headers['Authorization']

        prep.prepare_method("post")
        p = urlparse(previousresponse.request.url)
        prep.prepare_url(urlunparse(
            [p.scheme, p.netloc, APIV1 + "/sessions", None, None, None]),
                         params=None)

        logger.debug("Authenticating via url: {0}".format(prep.url))
        prep.prepare_body(data=None, files=None, json=authdict)
        authresponse = previousresponse.connection.send(
            prep, **kwargs)  # kwargs contains ssl _verify
        try:
            return authresponse.json()['sessionId']
        except:
            if authresponse.status_code == 503 and 'should be bootstrapped' in authresponse.json(
            ).get('errorMessage', ''):
                raise NotBootstrapped(authresponse.json().get('errorMessage'),
                                      authresponse)
            raise Unauthorized("Authentication failed", authresponse)
コード例 #11
0
    def request(self, *args, **kwargs):
        method = args[0]
        protocol, host, port, path, query = self._normalize_url(args[1])

        if not 'proxies' in kwargs:
            kwargs.update({'proxies': self.proxies})

        if not 'allow_redirects' in kwargs:
            kwargs.update({'allow_redirects': self.allow_redirects})

        if not 'timeout' in kwargs:
            kwargs['timeout'] = self.timeout

        if not 'verify' in kwargs:
            kwargs['verify'] = self.verify

        headers = copy.copy(self.headers)

        if 'headers' in kwargs:
            headers.update(kwargs['headers'])

        if 'user_agent' in kwargs:
            headers.update({'User-Agent': kwargs['user_agent']})

        dst_host = host
        host_header = host

        if 'dest_ip_addr' in kwargs:
            dst_host = kwargs['dest_ip_addr']
        elif self.dest_ip_addr:
            dst_host = self.dest_ip_addr
        elif self.use_dns_cache:
            dst_host = self.get_host_cache(host)

        if (protocol == 'http' and port != 80) or (protocol == 'https' and port != 443):
            dst_host += ':{0}'.format(port)
            host_header += ':{0}'.format(port)

        headers.update({'Host': host_header})

        url = urlunparse([protocol, dst_host, path, None, query, None])

        kwargs['headers'] = headers

        current_retry = 1
        result = None
        last_exception = None

        while current_retry <= self.max_retries:
            try:
                result = super().request(method, url, **kwargs)
                time.sleep(self.delay)
                break
            except TooManyRedirects as e:
                raise RequestException('Too many redirects: {0}'.format(e))
            except ConnectionError as e:
                if self.proxy_handler is not None:
                    raise RequestException('Error with the proxy: {0}')
                continue
            except (ConnectTimeout,
                    ReadTimeout,
                    Timeout,
                    IncompleteRead,
                    socket.timeout) as ex:
                last_exception = ex
                continue
            finally:
                current_retry += 1

        if current_retry > self.max_retries:
            raise (last_exception)

        return result
コード例 #12
0
 def _key_from_url(url):
     parsed = urlparse(url)
     return urlunparse((parsed.scheme.lower(),
                        parsed.netloc.lower(),
                        '', '', '', ''))
コード例 #13
0
ファイル: __init__.py プロジェクト: Anorov/cloudflare-scrape
    def solve_cf_challenge(self, resp, **original_kwargs):
        start_time = time.time()

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = parsed_url.netloc
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)

        cloudflare_kwargs = copy.deepcopy(original_kwargs)

        headers = cloudflare_kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url

        try:
            params = cloudflare_kwargs["params"] = OrderedDict(
                re.findall(r'name="(s|jschl_vc|pass)"(?: [^<>]*)? value="(.+?)"', body)
            )

            for k in ("jschl_vc", "pass"):
                if k not in params:
                    raise ValueError("%s is missing from challenge form" % k)
        except Exception as e:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            raise ValueError(
                "Unable to parse Cloudflare anti-bot IUAM page: %s %s"
                % (e.message, BUG_REPORT)
            )

        # Solve the Javascript challenge
        answer, delay = self.solve_challenge(body, domain)
        params["jschl_answer"] = answer

        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        method = resp.request.method
        cloudflare_kwargs["allow_redirects"] = False

        # Cloudflare requires a delay before solving the challenge
        time.sleep(max(delay - (time.time() - start_time), 0))

        # Send the challenge response and handle the redirect manually
        redirect = self.request(method, submit_url, **cloudflare_kwargs)
        redirect_location = urlparse(redirect.headers["Location"])

        if not redirect_location.netloc:
            redirect_url = urlunparse(
                (
                    parsed_url.scheme,
                    domain,
                    redirect_location.path,
                    redirect_location.params,
                    redirect_location.query,
                    redirect_location.fragment,
                )
            )
            return self.request(method, redirect_url, **original_kwargs)
        return self.request(method, redirect.headers["Location"], **original_kwargs)
コード例 #14
0
ファイル: utils.py プロジェクト: phanaj/gentb-site
 def __str__(self):
     """Return the private url, what we keep internally"""
     return urlunparse(self.url)
コード例 #15
0
ファイル: utils.py プロジェクト: phanaj/gentb-site
 def file(self, filename):
     """Gets a managed url for a sub-file portion"""
     path = os.path.join(self.url.path, filename)
     return ManagedUrl(urlunparse(self.url._replace(path=path)))
コード例 #16
0
    def solve_cf_challenge(self, resp, **original_kwargs):
        start_time = time.time()

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = parsed_url.netloc
        challenge_form = re.search(
            r'\<form.*?id=\"challenge-form\".*?\/form\>', body,
            flags=re.S).group(0)  # find challenge form
        method = re.search(r'method=\"(.*?)\"', challenge_form,
                           flags=re.S).group(1)
        if self.org_method is None:
            self.org_method = resp.request.method
        submit_url = "%s://%s%s" % (
            parsed_url.scheme, domain,
            re.search(r'action=\"(.*?)\"', challenge_form,
                      flags=re.S).group(1).split('?')[0])

        cloudflare_kwargs = copy.deepcopy(original_kwargs)

        headers = cloudflare_kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url

        try:
            cloudflare_kwargs["params"] = dict()
            cloudflare_kwargs["data"] = dict()
            if len(
                    re.search(r'action=\"(.*?)\"', challenge_form,
                              flags=re.S).group(1).split('?')) != 1:
                for param in re.search(
                        r'action=\"(.*?)\"', challenge_form,
                        flags=re.S).group(1).split('?')[1].split('&'):
                    cloudflare_kwargs["params"].update(
                        {param.split('=')[0]: param.split('=')[1]})

            for input_ in re.findall(r'[^-] \<input.*?(?:\/>|\<\/input\>)',
                                     challenge_form,
                                     flags=re.S):
                if re.search(r'name=\"(.*?)\"', input_,
                             flags=re.S).group(1) != 'jschl_answer':
                    if method == 'POST':
                        cloudflare_kwargs["data"].update(
                            {
                                re.search(r'name=\"(.*?)\"',
                                          input_,
                                          flags=re.S).group(1):
                                re.search(r'value=\"(.*?)\"',
                                          input_,
                                          flags=re.S).group(1)
                            })
                    elif method == 'GET':
                        cloudflare_kwargs["params"].update(
                            {
                                re.search(r'name=\"(.*?)\"',
                                          input_,
                                          flags=re.S).group(1):
                                re.search(r'value=\"(.*?)\"',
                                          input_,
                                          flags=re.S).group(1)
                            })
            if method == 'POST':
                for k in ("jschl_vc", "pass"):
                    if k not in cloudflare_kwargs["data"]:
                        raise ValueError("%s is missing from challenge form" %
                                         k)
            elif method == 'GET':
                for k in ("jschl_vc", "pass"):
                    if k not in cloudflare_kwargs["params"]:
                        raise ValueError("%s is missing from challenge form" %
                                         k)

        except Exception as e:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            raise ValueError(
                "Unable to parse Cloudflare anti-bot IUAM page: %s %s" %
                (e, BUG_REPORT))

        # Solve the Javascript challenge
        answer, delay = self.solve_challenge(body, domain)
        if method == 'POST':
            cloudflare_kwargs["data"]["jschl_answer"] = answer
        elif method == 'GET':
            cloudflare_kwargs["params"]["jschl_answer"] = answer

        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        cloudflare_kwargs["allow_redirects"] = False

        # Cloudflare requires a delay before solving the challenge
        time.sleep(max(delay - (time.time() - start_time), 0))

        # Send the challenge response and handle the redirect manually
        redirect = self.request(method, submit_url, **cloudflare_kwargs)
        if "Location" in redirect.headers:
            redirect_location = urlparse(redirect.headers["Location"])

            if not redirect_location.netloc:
                redirect_url = urlunparse((
                    parsed_url.scheme,
                    domain,
                    redirect_location.path,
                    redirect_location.params,
                    redirect_location.query,
                    redirect_location.fragment,
                ))
                return self.request(method, redirect_url, **original_kwargs)
            return self.request(method, redirect.headers["Location"],
                                **original_kwargs)
        elif "Set-Cookie" in redirect.headers:
            if 'cf_clearance' in redirect.headers['Set-Cookie']:
                resp = self.request(self.org_method,
                                    submit_url,
                                    cookies=redirect.cookies)
                return resp
            else:
                return self.request(method, submit_url, **original_kwargs)
        else:
            resp = self.request(self.org_method, submit_url,
                                **cloudflare_kwargs)
            return resp
コード例 #17
0
def prepare_url(self, url, params):
    """Prepares the given HTTP URL."""
    #: Accept objects that have string representations.
    #: We're unable to blindly call unicode/str functions
    #: as this will include the bytestring indicator (b'')
    #: on python 3.x.
    #: https://github.com/requests/requests/pull/2238
    if isinstance(url, bytes):
        url = url.decode('utf8')
    else:
        url = str(url)
    # Remove leading whitespaces from url
    url = url.lstrip()
    need_quote = True
    if url.startswith(key_unquote):
        need_quote = False
        url = url.replace(key_unquote, "")
    # Don't do any URL preparation for non-HTTP schemes like `mailto`,
    # `data` etc to work around exceptions from `url_parse`, which
    # handles RFC 3986 only.
    if ':' in url and not url.lower().startswith('http'):
        self.url = url
        return

    # Support for unicode domain names and paths.
    try:
        scheme, auth, host, port, path, query, fragment = parse_url(url)
    except LocationParseError as e:
        raise InvalidURL(*e.args)

    if not scheme:
        error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?")
        error = error.format(to_native_string(url, 'utf8'))

        raise MissingSchema(error)

    if not host:
        raise InvalidURL("Invalid URL %r: No host supplied" % url)

    # In general, we want to try IDNA encoding the hostname if the string contains
    # non-ASCII characters. This allows users to automatically get the correct IDNA
    # behaviour. For strings containing only ASCII characters, we need to also verify
    # it doesn't start with a wildcard (*), before allowing the unencoded hostname.
    if not unicode_is_ascii(host):
        try:
            host = self._get_idna_encoded_host(host)
        except UnicodeError:
            raise InvalidURL('URL has an invalid label.')
    elif host.startswith(u'*'):
        raise InvalidURL('URL has an invalid label.')

    # Carefully reconstruct the network location
    netloc = auth or ''
    if netloc:
        netloc += '@'
    netloc += host
    if port:
        netloc += ':' + str(port)

    # Bare domains aren't valid URLs.
    if not path:
        path = '/'
    if isinstance(params, (str, bytes)):
        params = to_native_string(params)

    enc_params = self._encode_params(params)
    if enc_params:
        if query:
            query = '%s&%s' % (query, enc_params)
        else:
            query = enc_params
    if need_quote:
        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
    else:
        url = urlunparse([scheme, netloc, path, None, query, fragment])
    self.url = url
コード例 #18
0
ファイル: client.py プロジェクト: dtkav/requests-batch
 def _prepend_host(self, path):
     scheme = self._batch_url_parsed.scheme
     netloc = self._batch_url_parsed.netloc
     parsed = urlparse(path)
     return urlunparse((scheme, netloc, parsed.path, parsed.params,
                        parsed.query, parsed.fragment))