def open_local_file(self, url): """Use local file.""" import mimetypes, mimetools, email.Utils try: from io import StringIO except ImportError: from io import StringIO host, file = splithost(url) localname = url2pathname(file) try: stats = os.stat(localname) except OSError as e: raise IOError(e.errno, e.strerror, e.filename) size = stats.st_size modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(url)[0] headers = mimetools.Message( StringIO( 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if not host: urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) host, port = splitport(host) if not port \ and socket.gethostbyname(host) in (localhost(), thishost()): urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) raise IOError('local file error', 'not on local host')
def http_open(self, req): url = req.get_full_url() try: content = self.urls[url] except KeyError: resp = addinfourl(StringIO(""), None, url) resp.code = 404 resp.msg = "OK" else: resp = addinfourl(StringIO(content), None, url) resp.code = 200 resp.msg = "OK" return resp
def http_open(self, req): fract = urlparse(req.get_full_url()) if self.url_target == fract.netloc: date = "-".join(tuple(filter(None, fract.path.split('/')))[-3:]) if date in data: resp = request.addinfourl(BytesIO(str(data[date]["djia"]).encode('utf-8')), "msg", req.get_full_url()) resp.code = 200 resp.msg = "OK" else: resp = request.addinfourl(BytesIO("error\ndata not available yet".encode('utf-8')), "msg", req.get_full_url()) resp.code = 404 resp.msg = "Not Found" else: raise NotImplementedError return resp
def decode_url(self, url): target, _, headers, _ = self.translate_all(url) headers_fp = open(headers, 'rb') code, = struct.unpack('>h', headers_fp.read(2)) def make_headers(fp): return HTTPMessage(fp) if PY2 else parse_headers(fp) return addinfourl(open(target, 'rb'), make_headers(headers_fp), url, code)
def http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get("content-encoding") == "gzip": gz = GzipFile( fileobj=StringIO(resp.read()), mode="r" ) resp = request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg # deflate if resp.headers.get("content-encoding") == "deflate": gz = StringIO(deflate(resp.read())) resp = request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) # 'class to add info() and resp.msg = old_resp.msg return resp
def decode_url(self, url): target, _, headers, _ = self.translate_all(url) headers_fp = open(headers) code, = struct.unpack('>h', headers_fp.read(2)) def make_headers(fp): return HTTPMessage(fp) if Compatibility.PY2 else parse_headers(fp) return addinfourl(open(target), make_headers(headers_fp), url, code)
def http_response(self, req, response): decompressed = None if response.headers.get("content-encoding") == "bzip2": import bz2 decompressed = io.BytesIO(bz2.decompress(response.read())) elif response.headers.get("content-encoding") == "gzip": from gzip import GzipFile decompressed = GzipFile(fileobj=io.BytesIO(response.read()), mode="r") elif response.headers.get("content-encoding") == "deflate": import zlib try: decompressed = io.BytesIO(zlib.decompress(response.read())) except zlib.error: # they ignored RFC1950 decompressed = io.BytesIO( zlib.decompress(response.read(), -zlib.MAX_WBITS)) if decompressed: old_response = response response = urllib_request.addinfourl(decompressed, old_response.headers, old_response.url, old_response.code) response.msg = old_response.msg return response
def https_open(self, req): host = req.get_host() if not host: raise M2Crypto.m2urllib2.URLError('no host given: ' + req.get_full_url()) # Our change: Check to see if we're using a proxy. # Then create an appropriate ssl-aware connection. full_url = req.get_full_url() target_host = urlparse(full_url)[1] if (target_host != host): h = myProxyHTTPSConnection(host = host, appname = self.appname, ssl_context = self.ctx) # M2Crypto.ProxyHTTPSConnection.putrequest expects a fullurl selector = full_url else: h = myHTTPSConnection(host = host, appname = self.appname, ssl_context = self.ctx) selector = req.get_selector() # End our change h.set_debuglevel(self._debuglevel) if self.saved_session: h.set_session(self.saved_session) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" try: h.request(req.get_method(), selector, req.data, headers) s = h.get_session() if s: self.saved_session = s r = h.getresponse() except socket.error as err: # XXX what error? err.filename = full_url raise M2Crypto.m2urllib2.URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = socket._fileobject(r) resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def open(self, url, conn_timeout=None): if conn_timeout == 0: raise urllib_error.URLError('Could not reach %s within deadline.' % url) if url.startswith('http'): self.opened.set() if self.error: raise urllib_error.HTTPError(url, self.error, None, None, Compatibility.BytesIO(b'glhglhg')) return urllib_request.addinfourl(Compatibility.BytesIO(self.rv), url, None, self.code)
def decode_url(self, url): target, _, headers, _ = self.translate_all(url) headers_fp = open(headers) code, = struct.unpack(">h", headers_fp.read(2)) def make_headers(fp): return HTTPMessage(fp) if Compatibility.PY2 else parse_headers(fp) return addinfourl(open(target), make_headers(headers_fp), url, code)
def mock_startit_response(request): mock_url = 'https://startit.rs/poslovi/pretraga/python/' if request.get_full_url() == mock_url: # addinfourl: https://archive.is/LpjxV response = addinfourl(read_mock_page(), 'mock header', request.get_full_url()) response.code = 200 response.msg = 'OK' return response
def s3_open(self, req): # The implementation was inspired mainly by the code behind # urllib.request.FileHandler.file_open(). # # recipe copied from: # http://code.activestate.com/recipes/578957-urllib-handler-for-amazon-s3-buckets/ # converted to boto3 if version_info[0] < 3: bucket_name = req.get_host() key_name = url2pathname(req.get_selector())[1:] else: bucket_name = req.host key_name = url2pathname(req.selector)[1:] if not bucket_name or not key_name: raise URLError('url must be in the format s3://<bucket>/<key>') s3 = boto3.resource('s3') key = s3.Object(bucket_name, key_name) client = boto3.client('s3') obj = client.get_object(Bucket=bucket_name, Key=key_name) filelike = _FileLikeKey(obj['Body']) origurl = 's3://{}/{}'.format(bucket_name, key_name) if key is None: raise URLError('no such resource: {}'.format(origurl)) headers = [ ('Content-type', key.content_type), ('Content-encoding', key.content_encoding), ('Content-language', key.content_language), ('Content-length', key.content_length), ('Etag', key.e_tag), ('Last-modified', key.last_modified), ] headers = email.message_from_string('\n'.join( '{}: {}'.format(key, value) for key, value in headers if value is not None)) return addinfourl(filelike, headers, origurl)
def open_gopher(self, url): """Use Gopher protocol.""" if not isinstance(url, str): raise IOError( 'gopher error', 'proxy support for gopher protocol currently not implemented') from eventlib.green import gopherlib host, selector = splithost(url) if not host: raise IOError('gopher error', 'no host given') host = unquote(host) type, selector = splitgophertype(selector) selector, query = splitquery(selector) selector = unquote(selector) if query: query = unquote(query) fp = gopherlib.send_query(selector, query, host) else: fp = gopherlib.send_selector(selector, host) return addinfourl(fp, noheaders(), "gopher:" + url)
def http_response(self, req, response): decompressed = None if response.headers.get('content-encoding') == 'bzip2': import bz2 decompressed = io.BytesIO(bz2.decompress(response.read())) elif response.headers.get('content-encoding') == 'gzip': from gzip import GzipFile decompressed = GzipFile(fileobj=io.BytesIO(response.read()), mode='r') elif response.headers.get('content-encoding') == 'deflate': import zlib try: decompressed = io.BytesIO(zlib.decompress(response.read())) except zlib.error: # they ignored RFC1950 decompressed = io.BytesIO(zlib.decompress(response.read(), -zlib.MAX_WBITS)) if decompressed: old_response = response response = urllib_request.addinfourl(decompressed, old_response.headers, old_response.url, old_response.code) response.msg = old_response.msg return response
def s3_open(self, req): # The implementation was inspired mainly by the code behind # urllib.request.FileHandler.file_open(). bucket_name = req.host key_name = url2pathname(req.selector)[1:] if not bucket_name or not key_name: raise URLError('url must be in the format s3://<bucket>/<key>') try: conn = self._conn except AttributeError: conn = self._conn = boto.s3.connection.S3Connection() bucket = conn.get_bucket(bucket_name, validate=False) key = bucket.get_key(key_name) origurl = 's3://{}/{}'.format(bucket_name, key_name) if key is None: raise URLError('no such resource: {}'.format(origurl)) headers = [ ('Content-type', key.content_type), ('Content-encoding', key.content_encoding), ('Content-language', key.content_language), ('Content-length', key.size), ('Etag', key.etag), ('Last-modified', key.last_modified), ] headers = email.message_from_string( '\n'.join('{}: {}'.format(key, value) for key, value in headers if value is not None)) return addinfourl(_FileLikeKey(key), headers, origurl)
def s3_open(self, req): # The implementation was inspired mainly by the code behind # urllib.request.FileHandler.file_open(). bucket_name = req.host key_name = url2pathname(req.selector)[1:] if not bucket_name or not key_name: raise URLError('url must be in the format s3://<bucket>/<key>') try: conn = self._conn except AttributeError: conn = self._conn = boto.s3.connection.S3Connection() bucket = conn.get_bucket(bucket_name, validate=False) key = bucket.get_key(key_name) origurl = 's3://{}/{}'.format(bucket_name, key_name) if key is None: raise URLError('no such resource: {}'.format(origurl)) headers = [ ('Content-type', key.content_type), ('Content-encoding', key.content_encoding), ('Content-language', key.content_language), ('Content-length', key.size), ('Etag', key.etag), ('Last-modified', key.last_modified), ] headers = email.message_from_string('\n'.join( '{}: {}'.format(key, value) for key, value in headers if value is not None)) return addinfourl(_FileLikeKey(key), headers, origurl)
def ftp_open(self, req): host = req.get_host() if not host: raise IOError('ftp error', 'no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') try: host = socket.gethostbyname(host) except socket.error as msg: raise URLError(msg) path, attrs = splitattr(req.get_selector()) dirs = path.split('/') dirs = map(unquote, dirs) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] try: fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: attr, value = splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() # -- range support modifications start here rest = None range_tup = range_header_to_tuple(req.headers.get('Range', None)) assert range_tup != () if range_tup: (fb, lb) = range_tup if fb > 0: rest = fb # -- range support modifications end here fp, retrlen = fw.retrfile(file, type, rest) # -- range support modifications start here if range_tup: (fb, lb) = range_tup if lb == '': if retrlen is None or retrlen == 0: raise RangeError( 9, 'Requested Range Not Satisfiable due to unobtainable file length.' ) lb = retrlen retrlen = lb - fb if retrlen < 0: # beginning of range is larger than file raise RangeError(9, 'Requested Range Not Satisfiable') else: retrlen = lb - fb fp = RangeableFileObject(fp, (0, retrlen)) # -- range support modifications end here headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors as msg: raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
def http_open(self, req): resp = addinfourl(StringIO('test'), '', req.get_full_url(), 200) resp.msg = 'OK' return resp
def http_error_303(self, req, fp, code, msg, headers): return addinfourl(fp, headers, req.get_full_url(), code)
def https_open(self, req): # https://docs.python.org/3.3/library/urllib.request.html#urllib.request.Request.get_host try: # up to python-3.2 host = req.get_host() except AttributeError: # from python-3.3 host = req.host if not host: raise M2Crypto.m2urllib2.URLError('no host given') # Our change: Check to see if we're using a proxy. # Then create an appropriate ssl-aware connection. full_url = req.get_full_url() target_host = urlparse(full_url)[1] if target_host != host: request_uri = urldefrag(full_url)[0] h = httpslib.ProxyHTTPSConnection(host=host, ssl_context=self.ctx) else: try: # up to python-3.2 request_uri = req.get_selector() except AttributeError: # from python-3.3 request_uri = req.selector h = httpslib.HTTPSConnection(host=host, ssl_context=self.ctx) # End our change h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" try: h.request(req.get_method(), request_uri, req.data, headers) r = h.getresponse() except socket.error as err: # XXX what error? raise M2Crypto.m2urllib2.URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. r.recv = r.read if (sys.version_info < (3, 0)): fp = socket._fileobject(r, close=True) else: r._decref_socketios = lambda: None r.ssl = h.sock.ssl r._timeout = -1.0 # hack to bypass python3 bug with 0 buffer size and # http/client.py readinto method for response class if r.length is not None and r.length == 0: r.readinto = lambda b: 0 r.recv_into = r.readinto fp = socket.SocketIO(r, 'rb') resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def http_error_303(self, req, fp, code, msg, headers): infourl = addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl
def http_error_206(self, req, fp, code, msg, hdrs): r = addinfourl(fp, hdrs, req.get_full_url()) r.code = code r.msg = msg return r
def http_error_404(self, req, fp, code, msg, headers): infourl = _urllib_request.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl
def http_error_206(self, req, fp, code, msg, hdrs): # 206 Partial Content Response r = addinfourl(fp, hdrs, req.get_full_url()) r.code = code r.msg = msg return r
def open_ftp(self, url): """Use FTP protocol.""" if not isinstance(url, str): raise IOError( 'ftp error', 'proxy support for ftp protocol currently not implemented') import mimetypes, mimetools try: from io import StringIO except ImportError: from io import StringIO host, path = splithost(url) if not host: raise IOError('ftp error', 'no host given') host, port = splitport(host) user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') host = socket.gethostbyname(host) if not port: from eventlib.green import ftplib port = ftplib.FTP_PORT else: port = int(port) path, attrs = splitattr(path) path = unquote(path) dirs = path.split('/') dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] if dirs and not dirs[0]: dirs[0] = '/' key = user, host, port, '/'.join(dirs) # XXX thread unsafe! if len(self.ftpcache) > MAXFTPCACHE: # Prune the cache, rather arbitrarily for k in list(self.ftpcache.keys()): if k != key: v = self.ftpcache[k] del self.ftpcache[k] v.close() try: if not key in self.ftpcache: self.ftpcache[key] = \ ftpwrapper(user, passwd, host, port, dirs) if not file: type = 'D' else: type = 'I' for attr in attrs: attr, value = splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() (fp, retrlen) = self.ftpcache[key].retrfile(file, type) mtype = mimetypes.guess_type("ftp:" + url)[0] headers = "" if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen headers = mimetools.Message(StringIO(headers)) return addinfourl(fp, headers, "ftp:" + url) except ftperrors() as msg: raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
def addinfourl_wrapper(stream, headers, url, code): if hasattr(compat_urllib_request.addinfourl, 'getcode'): return compat_urllib_request.addinfourl(stream, headers, url, code) ret = compat_urllib_request.addinfourl(stream, headers, url) ret.code = code return ret
def http_error_default(self, url, fp, errcode, errmsg, headers): """Default error handling -- don't raise an exception.""" return addinfourl(fp, headers, "http:" + url)
def open_http(self, url, data=None): """Use HTTP protocol.""" from eventlib.green import httplib user_passwd = None proxy_passwd = None if isinstance(url, str): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = splituser(host) # now we proceed with the url we want to obtain urltype, rest = splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if proxy_bypass(realhost): host = realhost #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None h = httplib.HTTP(host) if data is not None: h.putrequest('POST', selector) h.putheader('Content-Type', 'application/x-www-form-urlencoded') h.putheader('Content-Length', '%d' % len(data)) else: h.putrequest('GET', selector) if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) h.endheaders() if data is not None: h.send(data) errcode, errmsg, headers = h.getreply() if errcode == -1: # something went wrong with the HTTP status line raise IOError('http protocol error', 0, 'got a bad status line', None) fp = h.getfile() if errcode == 200: return addinfourl(fp, headers, "http:" + url) else: if data is None: return self.http_error(url, fp, errcode, errmsg, headers) else: return self.http_error(url, fp, errcode, errmsg, headers, data)