def headers(self): """Return Request-Line""" url = urlparse(self._orig.url) # Querystring qs = '' if url.query or self._orig.params: qs = '?' if url.query: qs += url.query # Requests doesn't make params part of ``request.url``. if self._orig.params: if url.query: qs += '&' #noinspection PyUnresolvedReferences qs += type(self._orig)._encode_params(self._orig.params) # Request-Line request_line = '{method} {path}{query} HTTP/1.1'.format( method=self._orig.method, path=url.path or '/', query=qs ) headers = dict(self._orig.headers) if 'Host' not in headers: headers['Host'] = urlparse(self._orig.url).netloc headers = ['%s: %s' % (name, value) for name, value in headers.items()] headers.insert(0, request_line) return '\r\n'.join(headers).strip()
def authenticate_server(self, response): """ Uses GSSAPI to authenticate the server. Returns True on success, False on failure. """ log.debug("authenticate_server(): Authenticate header: {0}".format( _negotiate_value(response))) host_port_thread = "%s_%s_%s" % (urlparse(response.url).hostname, urlparse(response.url).port, threading.current_thread().ident) try: result = kerberos.authGSSClientStep(self.context[host_port_thread], _negotiate_value(response)) except kerberos.GSSError: log.exception("authenticate_server(): authGSSClientStep() failed:") return False if result < 1: log.error("authenticate_server(): authGSSClientStep() failed: " "{0}".format(result)) return False log.debug("authenticate_server(): returning {0}".format(response)) return True
def get_connection(self, url, proxies=None, verify=None, cert=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ with self._pool_kw_lock: if url.lower().startswith('https'): self._update_poolmanager_ssl_kw(verify, cert) proxies = proxies or {} proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def refresh(self): name = urlparse(self.observable).netloc.replace( '.', '_').replace(':', '_') # Add the auth headers to any other headers auth_headers = self.auth.get_headers() headers = self.http_args.get('headers', {}) headers.update(auth_headers) # build new http args with these headers http_args = self.http_args.copy() http_args['headers'] = headers response = requests.get(self.info_url, **http_args) if response.status_code != 200: raise Exception('%s: status code %d' % (response.url, response.status_code)) info = msgpack.unpackb(response.content, encoding='utf-8') self.metadata = info['metadata'] entries = {s['name']: RemoteCatalogEntry(url=self.source_url, getenv=self.getenv, getshell=self.getshell, auth=self.auth, http_args=self.http_args, **s) for s in info['sources']} return name, {}, entries, []
def authenticate_server(self, response): """ Uses GSSAPI to authenticate the server. Returns True on success, False on failure. """ log.debug("authenticate_server(): Authenticate header: {0}".format( _negotiate_value(response))) host = urlparse(response.url).hostname try: result = kerberos.authGSSClientStep(self.context[host], _negotiate_value(response)) except kerberos.GSSError as e: log.error("authenticate_server(): authGSSClientStep() failed:") log.exception(e) return False if result < 1: log.error("authenticate_server(): authGSSClientStep() failed: " "{0}".format(result)) return False log.debug("authenticate_server(): returning {0}".format(response)) return True
def parse_args(self, env, args=None, namespace=None): self.env = env if env.is_windows and not env.stdout_isatty: self.error('Output redirection is not supported on Windows.' ' Please use `--output FILE\' instead.') args = super(Parser, self).parse_args(args, namespace) if args.debug: args.traceback = True if args.output: env.stdout = args.output env.stdout_isatty = False self._process_output_options(args, env) self._process_pretty_options(args, env) self._guess_method(args, env) self._parse_items(args) if not env.stdin_isatty: self._body_from_file(args, env.stdin) if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)): scheme = HTTPS if env.progname == 'https' else HTTP args.url = scheme + args.url if args.auth and not args.auth.has_password(): # Stdin already read (if not a tty) so it's save to prompt. args.auth.prompt_password(urlparse(args.url).netloc) return args
def from_request(request): """Make an `HTTPMessage` from `requests.models.Request`.""" url = urlparse(request.url) request_headers = dict(request.headers) if 'Host' not in request_headers: request_headers['Host'] = url.netloc try: body = request.data except AttributeError: # requests < 0.12.1 body = request._enc_data if isinstance(body, dict): # --form body = request.__class__._encode_params(body) return HTTPMessage( line='{method} {path} HTTP/1.1'.format( method=request.method, path=url.path or '/'), headers='\n'.join(str('%s: %s') % (name, value) for name, value in request_headers.items()), body=body, content_type=request_headers.get('Content-Type') )
def get_response(name, request_kwargs, read_only=False): """Like `client.get_response`, but applies permanent aspects of the session to the request. """ host = Host(request_kwargs['headers'].get('Host', None) or urlparse(request_kwargs['url']).netloc.split('@')[-1]) session = Session(host, name) session.load() # Update session headers with the request headers. session['headers'].update(request_kwargs.get('headers', {})) # Use the merged headers for the request request_kwargs['headers'] = session['headers'] auth = request_kwargs.get('auth', None) if auth: session.auth = auth elif session.auth: request_kwargs['auth'] = session.auth rsession = requests.Session(cookies=session.cookies) try: response = rsession.request(**request_kwargs) except Exception: raise else: # Existing sessions with `read_only=True` don't get updated. if session.is_new or not read_only: session.cookies = rsession.cookies session.save() return response
def generate_request_header(self, response): """ Generates the GSSAPI authentication token with kerberos. If any GSSAPI step fails, return None. """ host = urlparse(response.url).hostname peer_name = "{service}@{host}".format(service=self.service, host=host) # eg. [email protected] req_flags = (C_MUTUAL_FLAG,) if self.mutual_authentication in (REQUIRED, OPTIONAL) else () try: self.context[host] = InitContext(peer_name=peer_name, req_flags=req_flags, cred=self.cred) except GSSException: # TODO is it even possible for InitContext() to raise? log.exception("generate_request_header(): InitContext() failed:") return None try: gss_response = self.context[host].step(_negotiate_value(response)) except GSSException: log.exception("generate_request_header(): init_context.step() failed:") return None return "Negotiate {0}".format(gss_response)
def send(self, request, **kwargs): url = urlparse(request.url) if url.scheme != 'https': raise Exception('Only HTTPS is supported!') ctx = self._make_context() conn = httpslib.HTTPSConnection( url.hostname, url.port or 443, ssl_context=ctx) conn.request(request.method, url.path, request.body, request.headers) resp = conn.getresponse() response = Response() # Fallback to None if there's no status_code, for whatever reason. response.status_code = getattr(resp, 'status', None) # Make headers case-insensitive. response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) # Set encoding. response.encoding = get_encoding_from_headers(response.headers) response.raw = resp response.reason = response.raw.reason if isinstance(request.url, bytes): response.url = request.url.decode('utf-8') else: response.url = request.url # Give the Response some context. response.request = request response.connection = self return response
def get_filename_from_url(url): """Get a filename from a URL. >>> from planet.api import utils >>> urls = [ ... 'https://planet.com/', ... 'https://planet.com/path/to/', ... 'https://planet.com/path/to/example.tif', ... 'https://planet.com/path/to/example.tif?foo=f6f1&bar=baz', ... 'https://planet.com/path/to/example.tif?foo=f6f1&bar=baz#quux' ... ] >>> for url in urls: ... print('{} -> {}'.format(url, utils.get_filename_from_url(url))) ... https://planet.com/ -> None https://planet.com/path/to/ -> None https://planet.com/path/to/example.tif -> example.tif https://planet.com/path/to/example.tif?foo=f6f1&bar=baz -> example.tif https://planet.com/path/to/example.tif?foo=f6f1&bar=baz#quux -> example.tif >>> :returns: a filename (i.e. ``basename``) :rtype: str or None """ path = urlparse(url).path name = path[path.rfind('/')+1:] return name or None
def authenticate_server(self, response): """ Uses GSSAPI to authenticate the server. Returns True on success, False on failure. """ log.debug("authenticate_server(): Authenticate header: {0}".format( _negotiate_value(response))) host = urlparse(response.url).hostname try: # If this is set pass along the struct to Kerberos if self.cbt_struct: result = kerberos.authGSSClientStep(self.context[host], _negotiate_value(response), channel_bindings=self.cbt_struct) else: result = kerberos.authGSSClientStep(self.context[host], _negotiate_value(response)) except kerberos.GSSError: log.exception("authenticate_server(): authGSSClientStep() failed:") return False if result < 1: log.error("authenticate_server(): authGSSClientStep() failed: " "{0}".format(result)) return False log.debug("authenticate_server(): returning {0}".format(response)) return True
def authenticate_user(self, response, **kwargs): """Handles user authentication with gssapi/kerberos""" host = urlparse(response.url).hostname try: auth_header = self.generate_request_header(response, host) except KerberosExchangeError: # GSS Failure, return existing response return response log.debug("authenticate_user(): Authorization header: {0}".format( auth_header)) response.request.headers['Authorization'] = auth_header # Consume the content so we can reuse the connection for the next # request. response.content response.raw.release_conn() _r = response.connection.send(response.request, **kwargs) _r.history.append(response) log.debug("authenticate_user(): returning {0}".format(_r)) return _r
def request(session, base_path, method, path, **kwargs): """Construct a :class:`requests.Request` object and send it. :param requests.Session session: :param str base_path: :param str method: Method for the :class:`requests.Request` object. :param str path: (optional) The path to join with :attr:`CouchDB.url`. :param kwargs: (optional) Arguments that :meth:`requests.Session.request` takes. :rtype: requests.Response """ # Prepare the params dictionary if ('params' in kwargs) and isinstance(kwargs['params'], dict): params = kwargs['params'].copy() for key, val in iteritems(params): # Handle titlecase booleans if isinstance(val, bool): params[key] = json.dumps(val) kwargs['params'] = params if compat.urlparse(path).scheme: # Support absolute URLs url = path else: url = urljoin(base_path, path).strip('/') r = session.request(method, url, **kwargs) # Raise exception on a bad status code if not (200 <= r.status_code < 300): utils.raise_http_exception(r) return r
def get_tokens(cls, url, user_agent=None, **kwargs): scraper = cls.create_scraper() if user_agent: scraper.headers["User-Agent"] = user_agent try: resp = scraper.get(url, **kwargs) resp.raise_for_status() except Exception: logging.error("'%s' returned an error. Could not collect tokens." % url) raise domain = urlparse(resp.url).netloc cookie_domain = None for d in scraper.cookies.list_domains(): if d.startswith(".") and d in ("." + domain): cookie_domain = d break else: raise ValueError( 'Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM ("I\'m Under Attack Mode") enabled?' ) return ( { "__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain), "cf_clearance": scraper.cookies.get( "cf_clearance", "", domain=cookie_domain ), }, scraper.headers["User-Agent"], )
def parse_args(self, env, args=None, namespace=None): self.env = env args = super(Parser, self).parse_args(args, namespace) if not args.json and env.config.implicit_content_type == 'form': args.form = True if args.debug: args.traceback = True if args.output: env.stdout = args.output env.stdout_isatty = False self._process_output_options(args, env) self._process_pretty_options(args, env) self._guess_method(args, env) self._parse_items(args) if not env.stdin_isatty: self._body_from_file(args, env.stdin) if not (args.url.startswith(HTTP) or args.url.startswith(HTTPS)): scheme = HTTPS if env.progname == 'https' else HTTP args.url = scheme + args.url if args.auth and not args.auth.has_password(): # Stdin already read (if not a tty) so it's save to prompt. args.auth.prompt_password(urlparse(args.url).netloc) return args
def reply(self, url, special_reply_content=None): """ :param special_reply_content: will use this reply instead of self.replies :return: if success return True, else return False """ assert self.logged, 'Did not login successfully!' assert self._reply_contents or special_reply_content, 'No reply text!' resp = self.get(url) followup_value = self.pat_followup_value.search(resp.text).group(1).strip(r'\"') # 为什么不用parse_qs? 因为cc98 url上的boardid有些大写有些小写,不是统一的, # cc98 is too SB # 为什么不用lower降成小写?因为我写完上面那句话才想起来的,为了保留上面那句话 # I is too SB qs_list = parse_qsl(urlparse(url).query) boardid = qs_list[0][1] rootid = qs_list[1][1] reply_url = self.REPLY_BASE_URL + '?' + urlencode((('method', 'fastreply'), ('BoardID', boardid))) cookies_password = parse_qs(resp.request.headers['cookie']).get('password')[0] post_reply = special_reply_content if special_reply_content else random.choice(self._reply_contents) post_form = { 'followup': followup_value, 'RootID': rootid, 'star': '1', 'UserName': self.username, 'passwd': cookies_password, 'Expression': 'face7.gif', 'Content': post_reply, 'signflag': 'yes', } self._reply_resp = self.post(reply_url, data=post_form) return self._reply_resp.ok
def get_spn(self, r): if self.spn is None: domain = urlparse(r.url).hostname self.spn = spn = "HTTP@%s" % domain log.debug("calculated SPN as %s" % spn) return self.spn
def get_response(name, request_kwargs): host = Host(request_kwargs['headers'].get('Host', None) or urlparse(request_kwargs['url']).netloc.split('@')[-1]) session = Session(host, name) session.load() # Update session headers with the request headers. session['headers'].update(request_kwargs.get('headers', {})) # Use the merged headers for the request request_kwargs['headers'] = session['headers'] auth = request_kwargs.get('auth', None) if auth: session.auth = auth elif session.auth: request_kwargs['auth'] = session.auth rsession = RSession(cookies=session.cookies) try: response = rsession.request(**request_kwargs) except Exception: raise else: session.cookies = rsession.cookies session.save() return response
def _get_auth(self): parsed_url = urlparse(self.authentication_url) post_data = { 'name': self.username, 'password': self.password, 'next': parsed_url.path + '?' + parsed_url.query } try: response = self.session.post(self.url, data=post_data, allow_redirects=False) response.raise_for_status() response = self.session.get(response.headers['location'], allow_redirects=False) response.raise_for_status() resulting_uri = '{redirect_uri}#access_token=(.*)'.format( redirect_uri=re.escape(self.redirect_uri)) self.auth = re.search(resulting_uri, response.headers['location']).group(1) except Exception as error: helpers.handle_requests_exception(error) self.auth = None return self.auth
def __init__(self, count, url, cls, session, params=None, etag=None, headers=None): models.GitHubCore.__init__(self, {}, session) #: Original number of items requested self.original = count #: Number of items left in the iterator self.count = count #: URL the class used to make it's first GET self.url = url #: Last URL that was requested self.last_url = None self._api = self.url #: Class for constructing an item to return self.cls = cls #: Parameters of the query string self.params = params or {} self._remove_none(self.params) # We do not set this from the parameter sent. We want this to # represent the ETag header returned by GitHub no matter what. # If this is not None, then it won't be set from the response and # that's not what we want. #: The ETag Header value returned by GitHub self.etag = None #: Headers generated for the GET request self.headers = headers or {} #: The last response seen self.last_response = None #: Last status code received self.last_status = 0 if etag: self.headers.update({'If-None-Match': etag}) self.path = urlparse(self.url).path
def generate_request_header(self, response): """ Generates the GSSAPI authentication token with kerberos. If any GSSAPI step fails, return None. """ host = urlparse(response.url).hostname # Initialize uniq key for the self.context dictionary host_port_thread = "%s_%s_%s" % (urlparse(response.url).hostname, urlparse(response.url).port, threading.current_thread().ident) try: result, self.context[host_port_thread] = kerberos.authGSSClientInit( "{0}@{1}".format(self.service, host)) except kerberos.GSSError: log.exception("generate_request_header(): authGSSClientInit() failed:") return None if result < 1: log.error("generate_request_header(): authGSSClientInit() failed: " "{0}".format(result)) return None try: result = kerberos.authGSSClientStep(self.context[host_port_thread], _negotiate_value(response)) except kerberos.GSSError: log.exception("generate_request_header(): authGSSClientStep() failed:") return None if result < 0: log.error("generate_request_header(): authGSSClientStep() failed: " "{0}".format(result)) return None try: gss_response = kerberos.authGSSClientResponse(self.context[host_port_thread]) except kerberos.GSSError: log.exception("generate_request_header(): authGSSClientResponse() " "failed:") return None return "Negotiate {0}".format(gss_response)
def test_mixed_case_scheme_acceptable(self, httpbin, scheme): s = requests.Session() s.proxies = getproxies() parts = urlparse(httpbin('get')) url = scheme + parts.netloc + parts.path r = requests.Request('GET', url) r = s.send(r.prepare()) assert r.status_code == 200, 'failed for scheme {0}'.format(scheme)
def get_connection(self, socket_path, proxies=None): proxies = proxies or {} proxy = proxies.get(urlparse(socket_path.lower()).scheme) if proxy: raise ValueError('%s does not support specifying proxies' % self.__class__.__name__) return UnixHTTPConnectionPool(socket_path, self.timeout)
def join_remote_base_URL(self, urlpath): if self.pybossa_url is None: return "" # If the URL is reachable only from inside Docker, rewrite it for devs if urlparse(self.pybossa_url).netloc.lower() == "pybossa": return urljoin("http://localhost:3002", urlpath) else: return urljoin(self.pybossa_url, urlpath)
def parse(self, uri): parsed = urlparse(uri) return { 'scheme': parsed.scheme, 'netloc': parsed.netloc, 'path': parsed.path, 'fragment': parsed.fragment }
def handle_captcha_challenge(self, resp, url): error = ( "Cloudflare captcha challenge presented for %s (cfscrape cannot solve captchas)" % urlparse(url).netloc ) if ssl.OPENSSL_VERSION_NUMBER < 0x10101000: error += ". Your OpenSSL version is lower than 1.1.1. Please upgrade your OpenSSL library and recompile Python." raise CloudflareCaptchaError(error, response=resp)
def prepare_request(self, request): p = urlparse(request.url) # Rewrite the url to use foauth.org request.url = FOAUTH_TEMPLATE.format(domain=p.netloc, path=p.path) # Authenticate appropriately. request.prepare_auth(self.auth) return request
def test_mixed_case_scheme_acceptable(self): s = requests.Session() s.proxies = getproxies() parts = urlparse(httpbin("get")) schemes = ["http://", "HTTP://", "hTTp://", "HttP://", "https://", "HTTPS://", "hTTps://", "HttPs://"] for scheme in schemes: url = scheme + parts.netloc + parts.path r = requests.Request("GET", url) r = s.send(r.prepare()) assert r.status_code == 200, "failed for scheme {0}".format(scheme)
def _create_connection(self, url, proxies): proxies = proxies or {} proxy = proxies.get(urlparse(url.lower()).scheme) or proxies.get("https") or proxies.get("http") if proxy: return websocket.create_connection( url, proxy=self._proxy_from_url(proxy), proxy_header=self.proxy_headers(proxy) ) else: return websocket.create_connection(url)
def get_host_and_path_from_url(self, request): '''Given a PreparedRequest object, split the URL in such a manner as to determine the host and the path. This is a separate method to wrap some of urlparse's craziness.''' url = request.url # scheme, netloc, path, params, query, fragment = urlparse(url) parsed = urlparse(url) scheme = parsed.scheme path = parsed.path # If there is a slash on the front of the path, chuck it. if path.startswith('/'): path = path[1:] host = parsed.hostname port = parsed.port or 0 return (scheme, host, port, path)
def test_explicit_mech(self): with patch.multiple("gssapi.Credentials", __new__=fake_creds), \ patch.multiple("gssapi.SecurityContext", __init__=fake_init, step=fake_resp): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': b64_negotiate_token} host = urlparse(response.url).hostname fake_mech = b'fake mech' auth = requests_gssapi.HTTPSPNEGOAuth(mech=fake_mech) auth.generate_request_header(response, host) fake_init.assert_called_with( name=gssapi_sname("*****@*****.**"), usage="initiate", flags=gssflags, creds=None, mech=b'fake mech') fake_resp.assert_called_with(b"token")
def test_generate_request_header_step_error(self): with patch.multiple("gssapi.SecurityContext", __init__=fake_init, step=fail_resp): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': b64_negotiate_token} host = urlparse(response.url).hostname auth = requests_gssapi.HTTPKerberosAuth() self.assertRaises(requests_gssapi.exceptions.SPNEGOExchangeError, auth.generate_request_header, response, host) fake_init.assert_called_with( name=gssapi_name("*****@*****.**"), usage="initiate", flags=gssflags, creds=None, mech=None) fail_resp.assert_called_with(b"token")
def test_target_name(self): with patch.multiple("gssapi.SecurityContext", __init__=fake_init, step=fake_resp): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': b64_negotiate_token} host = urlparse(response.url).hostname auth = requests_gssapi.HTTPSPNEGOAuth( target_name="*****@*****.**") auth.generate_request_header(response, host) fake_init.assert_called_with( name=gssapi_sname("*****@*****.**"), usage="initiate", flags=gssflags, creds=None, mech=None) fake_resp.assert_called_with(b"token")
def test_generate_request_header(self): with patch.multiple("gssapi.SecurityContext", __init__=fake_init, step=fake_resp): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': b64_negotiate_token} host = urlparse(response.url).hostname auth = requests_gssapi.HTTPKerberosAuth() self.assertEqual(auth.generate_request_header(response, host), b64_negotiate_response) fake_init.assert_called_with( name=gssapi_sname("*****@*****.**"), creds=None, mech=None, flags=gssflags, usage="initiate") fake_resp.assert_called_with(b"token")
def test_realm_override(self): with patch.multiple(kerberos_module_name, authGSSClientInit=clientInit_complete, authGSSClientResponse=clientResponse, authGSSClientStep=clientStep_continue): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': 'negotiate token'} host = urlparse(response.url).hostname auth = requests_kerberos.HTTPKerberosAuth( hostname_override="otherhost.otherdomain.org") auth.generate_request_header(response, host) clientInit_complete.assert_called_with( "*****@*****.**", gssflags=(kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG), mech_oid=kerberos.GSS_MECH_OID_KRB5, principal=None)
def generate_request_header(self, response): """ Generates the GSSAPI authentication token with kerberos. If any GSSAPI step fails, return None. """ host = urlparse(response.url).hostname try: result, self.context[host] = kerberos.authGSSClientInit( "{0}@{1}".format(self.service, host)) except kerberos.GSSError as e: log.error("generate_request_header(): authGSSClientInit() failed:") log.exception(e) return None if result < 1: log.error("generate_request_header(): authGSSClientInit() failed: " "{0}".format(result)) return None try: result = kerberos.authGSSClientStep(self.context[host], _negotiate_value(response)) except kerberos.GSSError as e: log.error("generate_request_header(): authGSSClientStep() failed:") log.exception(e) return None if result < 0: log.error("generate_request_header(): authGSSClientStep() failed: " "{0}".format(result)) return None try: gss_response = kerberos.authGSSClientResponse(self.context[host]) except kerberos.GSSError as e: log.error("generate_request_header(): authGSSClientResponse() " "failed:") log.exception(e) return None return "Negotiate {0}".format(gss_response)
def write_chunk(jvm, chunk_number, filename, storage, file_data, conf): """ Writes a single chunk in HDFS. Chunks are provided by the interface and are blocks of data (binary) """ storage_url = storage.url if storage.url[-1] != '/' \ else storage.url[:-1] parsed = req_compat.urlparse(storage_url) if parsed.scheme == 'file': str_uri = '{proto}://{path}'.format( proto=parsed.scheme, path=parsed.path) else: str_uri = '{proto}://{host}:{port}'.format( proto=parsed.scheme, host=parsed.hostname, port=parsed.port) uri = jvm.java.net.URI(str_uri) hdfs = jvm.org.apache.hadoop.fs.FileSystem.get(uri, conf) log.info('================== %s', uri) tmp_path = get_tmp_path(jvm, hdfs, parsed, filename) chunk_filename = "{tmp}/{file}.part{part:09d}".format( tmp=tmp_path.toString(), file=filename, part=chunk_number) chunk_path = jvm.org.apache.hadoop.fs.Path(chunk_filename) output_stream = hdfs.create(chunk_path) block = bytearray2(file_data) output_stream.write(block, 0, len(block)) output_stream.close() # Checks if all file's parts are present full_path = tmp_path list_iter = hdfs.listFiles(full_path, False) counter = 0 while list_iter.hasNext(): counter += 1 list_iter.next() return file_data, hdfs, str_uri, tmp_path, counter
def preprocess_media_tags(element): if isinstance(element, html.HtmlElement): if element.tag in ['ol', 'ul']: # ignore any spaces between <ul> and <li> element.text = '' elif element.tag == 'li': # ignore spaces after </li> element.tail = '' elif element.tag == 'iframe': iframe_src = element.get('src') youtube = re.match(youtube_re, iframe_src) vimeo = re.match(vimeo_re, iframe_src) if youtube or vimeo: element.text = '' # ignore any legacy text if youtube: yt_id = urlparse(iframe_src).path.replace('/embed/', '') element.set( 'src', '/embed/youtube?url=' + quote_plus('https://www.youtube.com/watch?v=' + yt_id)) elif vimeo: element.set( 'src', '/embed/vimeo?url=' + quote_plus('https://vimeo.com/' + vimeo.group(2))) if not len(element.xpath('./ancestor::figure')): _wrap_figure(element) else: element.drop_tag() elif element.tag == 'blockquote' and element.get( 'class') == 'twitter-tweet': twitter_links = element.xpath('.//a') for tw_link in twitter_links: if twitter_re.match(tw_link.get('href')): twitter_frame = html.HtmlElement() twitter_frame.tag = 'iframe' twitter_frame.set( 'src', '/embed/twitter?url=' + quote_plus(tw_link.get('href'))) element.addprevious(twitter_frame) _wrap_figure(twitter_frame) element.drop_tree()
def test_generate_request_header(self): with patch.multiple(kerberos_module_name, authGSSClientInit=clientInit_complete, authGSSClientResponse=clientResponse, authGSSClientStep=clientStep_continue): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': 'negotiate token'} host = urlparse(response.url).hostname auth = requests_kerberos.HTTPKerberosAuth() self.assertEqual(auth.generate_request_header(response, host), "Negotiate GSSRESPONSE") clientInit_complete.assert_called_with( "*****@*****.**", gssflags=(kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG), principal=None) clientStep_continue.assert_called_with("CTX", "token") clientResponse.assert_called_with("CTX")
def test_principal_override(self): with patch.multiple("gssapi.Credentials", __new__=fake_creds), \ patch.multiple("gssapi.SecurityContext", __init__=fake_init, step=fake_resp): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': b64_negotiate_token} host = urlparse(response.url).hostname auth = requests_gssapi.HTTPKerberosAuth(principal="user@REALM") auth.generate_request_header(response, host) fake_creds.assert_called_with(gssapi.creds.Credentials, usage="initiate", name=gssapi_uname("user@REALM", )) fake_init.assert_called_with( name=gssapi_sname("*****@*****.**"), usage="initiate", flags=gssflags, creds=b"fake creds", mech=None)
def send(self, stream=False, timeout=None, verify=True, cert=None, proxies=None): request = self.request connect_timeout, self.read_timeout = parse_timeout(timeout) self.stream_body = stream # set connect timeout with stack_context.ExceptionStackContext(self._handle_exception): if connect_timeout: self._timeout = self.io_loop.call_later( connect_timeout, stack_context.wrap( functools.partial(self._on_timeout, 'while connecting'))) # set proxy related info proxy = select_proxy(request.url, proxies) self.headers = request.headers.copy() if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') parsed = urlparse(proxy) scheme, host, port = parsed.scheme, proxy, parsed.port port = port or (443 if scheme == 'https' else 80) self.start_line = RequestStartLine(request.method, request.url, '') self.headers.update(get_proxy_headers(proxy)) else: host, port = None, None self.start_line = request.start_line self.tcp_client.connect(request.host, request.port, af=request.af, ssl_options=self._get_ssl_options( request, verify, cert), max_buffer_size=self.max_buffer_size, source_ip=host, source_port=port, callback=self._on_connect)
def make_request(self, url, method=u'GET', headers=None, body_string=None, sign_request=True, not_found_ok=False, **kwargs): # pull out status or None status_code = kwargs.pop(u'status', None) # remove retries arg if provided kwargs.pop(u'retries', None) path = urlparse(url).path # we must parse the query string so we can provide it if it exists so that we can pass it to the # build_vinyldns_request so that it can be properly included in the AWS signing... query = parse_qs(urlsplit(url).query) if query: # the problem with parse_qs is that it will return a list for ALL params, even if they are a single value # we need to essentially flatten the params if a param has only one value query = dict((k, v if len(v)>1 else v[0]) for k, v in iteritems(query)) if sign_request: signed_headers, signed_body = self.build_vinyldns_request(method, path, body_string, query, with_headers=headers or {}, **kwargs) else: signed_headers = headers or {} signed_body = body_string if not_found_ok: response = self.session_not_found_ok.request(method, url, data=signed_body, headers=signed_headers, **kwargs) else: response = self.session.request(method, url, data=signed_body, headers=signed_headers, **kwargs) if status_code is not None: if isinstance(status_code, collections.Iterable): assert_that(response.status_code, is_in(status_code)) else: assert_that(response.status_code, is_(status_code)) try: return response.status_code, response.json() except: return response.status_code, response.text
def send(self, request, **kwargs): """Wraps a pkg_resource.resource_stream The host name is interpreted as the package name """ # Check that the method makes sense. Only support GET if request.method not in ('GET', 'HEAD'): raise ValueError('Invalid request method {}'.format( request.method)) # Parse the URL url_parts = urlparse(request.url) # Interpret host name as package name if not url_parts.netloc: raise ValueError( 'pkg_resource: hostname interpreted as package name') pkg_name = url_parts.netloc resp = Response() # Open the resource stream, translate certain errors into HTTP # responses. Use urllib's unquote to translate percent escapes into # whatever they actually need to be try: resp.raw = pkg_resources.resource_stream(pkg_name, url_parts.path) resp.raw.release_conn = resp.raw.close except FileNotFoundError as e: resp.status_code = codes.not_found # Wrap the error message in a file-like object # The error message will be localized, try to convert the string # representation of the exception into a byte stream resp_str = str(e).encode(locale.getpreferredencoding(False)) resp.raw = io.BytesIO(resp_str) resp.headers['Content-Length'] = len(resp_str) # Add release_conn to the BytesIO object resp.raw.release_conn = resp.raw.close else: resp.status_code = codes.ok return resp
def finish_message(self, message, request, get_footer_url=(30, 3), **kwargs): """Handle the initial response. This hook identifies the URL in the footer of the initial response, makes a second request (polling as indicated by *get_footer_url*), and returns a new DataMessage with the parsed content. Parameters ---------- get_footer_url : (int, int) Tuple of the form (`seconds`, `attempts`), controlling the interval between attempts to retrieve the data from the URL, and the maximum number of attempts to make. """ # Check the message footer for a text element that is a valid URL url = None for text in getattr(message.footer, 'text', []): if urlparse(str(text)).scheme: url = str(text) break if not url: return message # Unpack arguments wait_seconds, attempts = get_footer_url # Create a temporary file to store the ZIP response with NamedTemporaryFile(prefix='pandasdmx-') as ntf: # Make a limited number of attempts to retrieve the file for a in range(attempts): sleep(wait_seconds) try: # This line succeeds if the file exists; the ZIP response # is stored to ntf.name, and then used by the # handle_response() hook below return request.get(url=url, tofile=ntf.name) except requests.HTTPError: raise raise RuntimeError('Maximum attempts exceeded')
def test_generate_request_header_step_error(self): with patch.multiple(kerberos_module_name, authGSSClientInit=clientInit_complete, authGSSClientResponse=clientResponse, authGSSClientStep=clientStep_error): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': 'negotiate token'} host = urlparse(response.url).hostname auth = requests_kerberos.HTTPKerberosAuth() self.assertRaises( requests_kerberos.exceptions.KerberosExchangeError, auth.generate_request_header, response, host) clientInit_complete.assert_called_with( "*****@*****.**", gssflags=(kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG), principal=None) clientStep_error.assert_called_with("CTX", "token") self.assertFalse(clientResponse.called)
def get_host_and_path_from_url(self, request): """Given a PreparedRequest object, split the URL in such a manner as to determine the host and the path. This is a separate method to wrap some of urlparse's craziness.""" url = request.url # scheme, netloc, path, params, query, fragment = urlparse(url) parsed = urlparse(url) path = parsed.path print("parsed = {}".format(parsed)) # If there is a slash on the front of the path, chuck it. if len(path) > 0 and path[0] == '/': path = path[1:] query = parsed.query if query: path = "{}?{}".format(path, query) host = parsed.hostname port = parsed.port or 0 return (host, port, path)
def _establish_kerberos(self, url, stream=False, timeout=None, verify=True, cert=None, proxies=None): parsed = urlparse(url) crypt = None try: crypt = MSKerberosCrypt(parsed.hostname) except GSSError: crypt = MSKerberosCrypt(parsed.hostname, service="HTTP") headers = {} headers['Authorization'] = ("Kerberos " + crypt.get_token()) headers["Content-Type"] = "application/soap+xml;charset=UTF-8" headers["Connection"] = 'Keep-Alive' p = requests.PreparedRequest() p.prepare_method("POST") p.prepare_url(url, None) p.prepare_headers(headers) p.prepare_body("", None, None) auth = HTTPMSKerberosAuth() p.prepare_auth(auth, url) verify = requests.adapters.HTTPAdapter.send(self, p, stream, timeout, verify, cert, proxies) field = verify.headers['www-authenticate'] kind, __, details = field.strip().partition(" ") if kind.lower() == "kerberos": crypt.step(details.strip()) HTTPMSKerberosAdapter.krb_dict[url] = crypt verify.content verify.close() return verify
def rebuild_proxies(self, prepared_request, proxies): """This method re-evaluates the proxy configuration by considering the environment variables. If we are redirected to a URL covered by NO_PROXY, we strip the proxy configuration. Otherwise, we set missing proxy keys for this URL (in case they were stripped by a previous redirect). This method also replaces the Proxy-Authorization header where necessary. :rtype: dict """ proxies = proxies if proxies is not None else {} headers = prepared_request.headers url = prepared_request.url scheme = urlparse(url).scheme new_proxies = proxies.copy() no_proxy = proxies.get('no_proxy') bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy) # if self.trust_env and not bypass_proxy: # environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) # # proxy = environ_proxies.get(scheme, environ_proxies.get('all')) # # if proxy: # new_proxies.setdefault(scheme, proxy) if 'Proxy-Authorization' in headers: del headers['Proxy-Authorization'] try: username, password = get_auth_from_url(new_proxies[scheme]) except KeyError: username, password = None, None if username and password: headers['Proxy-Authorization'] = _basic_auth_str( username, password) return new_proxies
def test_principal_override(self): with patch.multiple(kerberos_module_name, authGSSClientInit=clientInit_complete, authGSSClientResponse=clientResponse, authGSSClientStep=clientStep_continue): response = requests.Response() response.url = "http://www.example.org/" response.headers = {'www-authenticate': 'negotiate token'} host = urlparse(response.url).hostname auth = requests_kerberos.HTTPKerberosAuth(principal="user@REALM") try: auth.generate_request_header(response, host) clientInit_complete.assert_called_with( "*****@*****.**", gssflags=(kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG), principal="user@REALM") except NotImplementedError: # principal is not supported with kerberos-sspi. if not auth._using_kerberos_sspi: raise
def test_kerberos_sspi_reject_principal(self): with patch.multiple(kerberos_module_name, authGSSClientInit=clientInit_complete, authGSSClientResponse=clientResponse, authGSSClientStep=clientStep_continue): response = requests.Response() response.url = "http://www.example.org/" host = urlparse(response.url).hostname auth = requests_kerberos.HTTPKerberosAuth(principal="user@REALM") auth._using_kerberos_sspi = True self.assertRaises(NotImplementedError, auth.generate_request_header, response, host) auth = requests_kerberos.HTTPKerberosAuth(principal=None) auth._using_kerberos_sspi = True auth.generate_request_header(response, host) clientInit_complete.assert_called_with( "*****@*****.**", gssflags=(kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG))
def insert_spoofed_https_csrf_headers(headers, base_url): """ Creates HTTP headers that help to work around Django's CSRF protection, which shouldn't apply outside of the browser context. :param headers: a dictionary into which headers will be inserted, if needed :param base_url: the base URL of the Django application being contacted """ # if connecting to Django/DRF via HTTPS, spoof the 'Host' and 'Referer' headers that Django # uses to help prevent cross-site scripting attacks for secure browser connections. This # should be OK for a standalone Python REST API client, since the origin of a # cross-site scripting attack is malicious website code that executes in a browser, # but accesses another site's credentials via the browser or via user prompts within the # browser. Not applicable in this case for a standalone REST API client. # References: # https://docs.djangoproject.com/en/dev/ref/csrf/#how-it-works # http://security.stackexchange.com/questions/96114/why-is-referer-checking-needed-for-django # http://mathieu.fenniak.net/is-your-web-api-susceptible-to-a-csrf-exploit/ # -to-prevent-csrf if urlparse(base_url).scheme == 'https': headers['Host'] = urlsplit(base_url).netloc headers['Referer'] = base_url # LOL! Bad spelling is now standard :-)
def preprocess_media_tags(element): if isinstance(element, html.HtmlElement): if element.tag == 'figcaption': # figcaption may have only text content [e.drop_tag() for e in element.findall('*')] elif element.tag in ['ol', 'ul']: # ignore any spaces between <ul> and <li> element.text = '' elif element.tag == 'li': # ignore spaces after </li> element.tail = '' elif element.tag == 'iframe' and element.get('src'): iframe_src = element.get('src') youtube = youtube_re.match(iframe_src) vimeo = vimeo_re.match(iframe_src) if youtube or vimeo: if youtube: yt_id = urlparse(iframe_src).path.replace('/embed/', '') element.set( 'src', '/embed/youtube?url=' + quote_plus('https://www.youtube.com/watch?v=' + yt_id)) elif vimeo: element.set( 'src', '/embed/vimeo?url=' + quote_plus('https://vimeo.com/' + vimeo.group(2))) element = _wrap_tag(element, 'figure') elif element.tag == 'blockquote' and element.get( 'class') == 'twitter-tweet': twitter_links = element.cssselect('a') for tw_link in twitter_links: if twitter_re.match(tw_link.get('href')): twitter_frame = html.HtmlElement() twitter_frame.tag = 'iframe' twitter_frame.set( 'src', '/embed/twitter?url=' + quote_plus(tw_link.get('href'))) element = _wrap_tag(twitter_frame, 'figure') return element
def __init__(self, count, url, cls, session, params=None, etag=None, headers=None): GitHubCore.__init__(self, {}, session) #: Original number of items requested self.original = count #: Number of items left in the iterator self.count = count #: URL the class used to make it's first GET self.url = url #: Last URL that was requested self.last_url = None self._api = self.url #: Class for constructing an item to return self.cls = cls #: Parameters of the query string self.params = params or {} self._remove_none(self.params) # We do not set this from the parameter sent. We want this to # represent the ETag header returned by GitHub no matter what. # If this is not None, then it won't be set from the response and # that's not what we want. #: The ETag Header value returned by GitHub self.etag = None #: Headers generated for the GET request self.headers = headers or {} #: The last response seen self.last_response = None #: Last status code received self.last_status = 0 if etag: self.headers.update({'If-None-Match': etag}) self.path = urlparse(self.url).path
def __call__(self, request): if self.force_preemptive and not self.auth_done: # add Authorization header before we receive a 401 # by the 401 handler host = urlparse(request.url).hostname auth_header = self.generate_request_header(None, host, is_preemptive=True) log.debug("HTTPKerberosAuth: Preemptive Authorization header: {0}".format(auth_header)) request.headers['Authorization'] = auth_header request.register_hook('response', self.handle_response) try: self.pos = request.body.tell() except AttributeError: # In the case of HTTPKerberosAuth being reused and the body # of the previous request was a file-like object, pos has # the file position of the previous body. Ensure it's set to # None. self.pos = None return request
def authenticate_server(self, response): """ Uses GSSAPI to authenticate the server. Returns True on success, False on failure. """ log.debug("authenticate_server(): Authenticate header: {0}".format( _negotiate_value(response))) host = urlparse(response.url).hostname try: # If the handshake isn't complete here, nothing we can do self.context[host].step(_negotiate_value(response)) except gssapi.exceptions.GSSError as error: log.exception("authenticate_server(): context stepping failed:") log.exception(error.gen_message()) return False log.debug("authenticate_server(): returning {0}".format(response)) return True
def __make_request(self, url, method=u'GET', headers=None, body_string=None, **kwargs): # remove retries arg if provided kwargs.pop(u'retries', None) path = urlparse(url).path # we must parse the query string so we can provide it if it exists so that we can pass it to the # build_vinyldns_request so that it can be properly included in the AWS signing... query = parse_qs(urlsplit(url).query) if query: # the problem with parse_qs is that it will return a list for ALL params, even if they are a single value # we need to essentially flatten the params if a param has only one value query = dict((k, v if len(v) > 1 else v[0]) for k, v in iteritems(query)) signed_headers, signed_body = self.__build_vinyldns_request(method, path, body_string, query, with_headers=headers or {}, **kwargs) response = self.session.request(method, url, data=signed_body, headers=signed_headers, **kwargs) return self.__check_response(response)
def get_string_to_sign(self, request, headers, created_timestamp, expires_timestamp): sts = [] for header in headers: if header == "(request-target)": path_url = requests.models.RequestEncodingMixin.path_url.fget(request) sts.append("{}: {} {}".format(header, request.method.lower(), path_url)) elif header == "(created)": sts.append("{}: {}".format(header, created_timestamp)) elif header == "(expires)": assert (expires_timestamp is not None), \ 'You should provide the "expires_in" argument when using the (expires) header' sts.append("{}: {}".format(header, int(expires_timestamp))) else: if header.lower() == "host": url = urlparse(request.url) value = request.headers.get("host", url.hostname) if url.scheme == "http" and url.port not in [None, 80] or url.scheme == "https" \ and url.port not in [443, None]: value = "{}:{}".format(value, url.port) else: value = request.headers[header] sts.append("{k}: {v}".format(k=header.lower(), v=value)) return "\n".join(sts).encode()
def get_session(self, previousresponse, **kwargs): """Perform a session login and return a new session ID.""" if self.username is None or self.password is None: raise Unauthorized("Cannot authenticate without username/password") logger.info("Attempting to authenticate as {0}".format(self.username)) authdict = { "username": self.username, "password": self.password, "provider": self.provider } prep = previousresponse.request.copy() try: del prep.headers['Authorization'] except KeyError: pass # Better to ask for forgiveness than to look before you leap if 'Authorization' in prep.headers: del prep.headers['Authorization'] prep.prepare_method("post") p = urlparse(previousresponse.request.url) prep.prepare_url(urlunparse( [p.scheme, p.netloc, APIV1 + "/sessions", None, None, None]), params=None) logger.debug("Authenticating via url: {0}".format(prep.url)) prep.prepare_body(data=None, files=None, json=authdict) authresponse = previousresponse.connection.send( prep, **kwargs) # kwargs contains ssl _verify try: return authresponse.json()['sessionId'] except: if authresponse.status_code == 503 and 'should be bootstrapped' in authresponse.json( ).get('errorMessage', ''): raise NotBootstrapped(authresponse.json().get('errorMessage'), authresponse) raise Unauthorized("Authentication failed", authresponse)
def handle_401(self, response, **kwargs): logger.debug("Starting to handle 401 error") logger.debug(response.headers) challenges = self.get_challenges(response) logger.debug("auth_methods={0}".format(challenges)) if 'negotiate' not in challenges: logger.debug("Giving up on negotiate auth") return response host = urlparse(response.url).hostname logger.debug("host={0}".format(host)) if host in self.contexts: ctx = self.contexts[host] else: ctx = self.contexts[host] = self.get_context(host) logger.debug("ctx={0}".format(ctx)) in_token = base64.b64decode(challenges['negotiate']) \ if challenges['negotiate'] else None out_token = base64.b64encode(ctx.step(in_token)) while response.status_code == 401 and not ctx.complete: response.content response.raw.release_conn() new_request = response.request.copy() new_request.headers['Authorization'] = 'Negotiate ' + out_token new_response = response.connection.send(new_request, **kwargs) new_response.history.append(response) new_response.request = new_request response = new_response challenges = self.get_challenges(response) in_token = base64.b64decode(challenges['negotiate']) out_token = ctx.step(in_token) if out_token: out_token = base64.b64encode(out_token) return response