def _get(self, relpath, offsets, tail_amount=0): """See HttpTransport._get""" abspath = self._remote_path(relpath) headers = {} accepted_errors = [200, 404] if offsets or tail_amount: range_header = self._attempted_range_header(offsets, tail_amount) if range_header is not None: accepted_errors.append(206) accepted_errors.append(400) accepted_errors.append(416) bytes = 'bytes=' + range_header headers = {'Range': bytes} request = Request('GET', abspath, None, headers, accepted_errors=accepted_errors) response = self._perform(request) code = response.code if code == 404: # not found raise errors.NoSuchFile(abspath) elif code in (400, 416): # We don't know which, but one of the ranges we specified was # wrong. raise errors.InvalidHttpRange(abspath, range_header, 'Server return code %d' % code) data = handle_response(abspath, code, response.info(), response) return code, data
def _get_ranged(self, relpath, offsets, tail_amount): """Make a request for just part of the file.""" curl = self._get_curl() abspath, data, header = self._setup_get_request(curl, relpath) range_header = self._attempted_range_header(offsets, tail_amount) if range_header is None: # Forget ranges, the server can't handle them return self._get_full(relpath) self._curl_perform(curl, header, ['Range: bytes=%s' % range_header]) data.seek(0) code = curl.getinfo(pycurl.HTTP_CODE) if code == 404: # not found raise errors.NoSuchFile(abspath) elif code in (400, 416): # We don't know which, but one of the ranges we specified was # wrong. raise errors.InvalidHttpRange(abspath, range_header, 'Server return code %d' % curl.getinfo(pycurl.HTTP_CODE)) msg = self._parse_headers(header) return code, response.handle_response(abspath, code, msg, data)
def _get_ranged(self, relpath, offsets, tail_amount): """Make a request for just part of the file.""" curl = self._get_curl() abspath, data, header = self._setup_get_request(curl, relpath) range_header = self._attempted_range_header(offsets, tail_amount) if range_header is None: # Forget ranges, the server can't handle them return self._get_full(relpath) self._curl_perform(curl, header, ['Range: bytes=%s' % range_header]) data.seek(0) code = curl.getinfo(pycurl.HTTP_CODE) if code == 404: # not found raise errors.NoSuchFile(abspath) elif code in (400, 416): # We don't know which, but one of the ranges we specified was # wrong. raise errors.InvalidHttpRange( abspath, range_header, 'Server return code %d' % curl.getinfo(pycurl.HTTP_CODE)) msg = self._parse_headers(header) return code, response.handle_response(abspath, code, msg, data)
def _post(self, body_bytes): abspath = self._remote_path('.bzr/smart') # We include 403 in accepted_errors so that send_http_smart_request can # handle a 403. Otherwise a 403 causes an unhandled TransportError. response = self._perform( Request('POST', abspath, body_bytes, accepted_errors=[200, 403])) code = response.code data = handle_response(abspath, code, response.info(), response) return code, data
def _post(self, body_bytes): abspath = self._remote_path('.bzr/smart') # We include 403 in accepted_errors so that send_http_smart_request can # handle a 403. Otherwise a 403 causes an unhandled TransportError. response = self._perform(Request('POST', abspath, body_bytes, accepted_errors=[200, 403])) code = response.code data = handle_response(abspath, code, response.info(), response) return code, data
def _post(self, body_bytes): fake_file = StringIO(body_bytes) curl = self._get_curl() # Other places that use the Curl object (returned by _get_curl) # for GET requests explicitly set HTTPGET, so it should be safe to # re-use the same object for both GETs and POSTs. curl.setopt(pycurl.POST, 1) curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes)) curl.setopt(pycurl.READFUNCTION, fake_file.read) abspath, data, header = self._setup_request(curl, '.bzr/smart') # We override the Expect: header so that pycurl will send the POST # body immediately. self._curl_perform(curl, header, ['Expect: ']) data.seek(0) code = curl.getinfo(pycurl.HTTP_CODE) msg = self._parse_headers(header) return code, response.handle_response(abspath, code, msg, data)
def test_full_text_no_content_length(self): code, raw_headers, body = _full_text_response_no_content_length msg = self._build_HTTPMessage(raw_headers) out = response.handle_response('http://foo', code, msg, StringIO(body)) self.assertEqual(body, out.read())
def test_full_text_no_content_type(self): # We should not require Content-Type for a full response code, raw_headers, body = _full_text_response_no_content_type msg = self._build_HTTPMessage(raw_headers) out = response.handle_response('http://foo', code, msg, StringIO(body)) self.assertEqual(body, out.read())
def get_response(self, a_response): """Process a supplied response, and return the result.""" code, raw_headers, body = a_response msg = self._build_HTTPMessage(raw_headers) return response.handle_response('http://foo', code, msg, StringIO(a_response[2]))
class PyCurlTransport(HttpTransportBase): """http client transport using pycurl PyCurl is a Python binding to the C "curl" multiprotocol client. This transport can be significantly faster than the builtin Python client. Advantages include: DNS caching. """ def __init__(self, base, _from_transport=None): super(PyCurlTransport, self).__init__(base, 'pycurl', _from_transport=_from_transport) if self._unqualified_scheme == 'https': # Check availability of https into pycurl supported # protocols supported = pycurl.version_info()[8] if 'https' not in supported: raise errors.DependencyNotPresent('pycurl', 'no https support') self.cabundle = ca_bundle.get_ca_path() def _get_curl(self): connection = self._get_connection() if connection is None: # First connection ever. There is no credentials for pycurl, either # the password was embedded in the URL or it's not needed. The # connection for pycurl is just the Curl object, it will not # connect to the http server until the first request (which had # just called us). connection = pycurl.Curl() # First request, initialize credentials. auth = self._create_auth() # Proxy handling is out of reach, so we punt self._set_connection(connection, auth) return connection def disconnect(self): connection = self._get_connection() if connection is not None: connection.close() def has(self, relpath): """See Transport.has()""" # We set NO BODY=0 in _get_full, so it should be safe # to re-use the non-range curl object curl = self._get_curl() abspath = self._remote_path(relpath) curl.setopt(pycurl.URL, abspath) self._set_curl_options(curl) curl.setopt(pycurl.HTTPGET, 1) # don't want the body - ie just do a HEAD request # This means "NO BODY" not 'nobody' curl.setopt(pycurl.NOBODY, 1) # But we need headers to handle redirections header = StringIO() curl.setopt(pycurl.HEADERFUNCTION, header.write) # In some erroneous cases, pycurl will emit text on # stdout if we don't catch it (see InvalidStatus tests # for one such occurrence). blackhole = StringIO() curl.setopt(pycurl.WRITEFUNCTION, blackhole.write) self._curl_perform(curl, header) code = curl.getinfo(pycurl.HTTP_CODE) if code == 404: # not found return False elif code == 200: # "ok" return True else: self._raise_curl_http_error(curl) def _get(self, relpath, offsets, tail_amount=0): # This just switches based on the type of request if offsets is not None or tail_amount not in (0, None): return self._get_ranged(relpath, offsets, tail_amount=tail_amount) else: return self._get_full(relpath) def _setup_get_request(self, curl, relpath): # Make sure we do a GET request. versions > 7.14.1 also set the # NO BODY flag, but we'll do it ourselves in case it is an older # pycurl version curl.setopt(pycurl.NOBODY, 0) curl.setopt(pycurl.HTTPGET, 1) return self._setup_request(curl, relpath) def _setup_request(self, curl, relpath): """Do the common setup stuff for making a request :param curl: The curl object to place the request on :param relpath: The relative path that we want to get :return: (abspath, data, header) abspath: full url data: file that will be filled with the body header: file that will be filled with the headers """ abspath = self._remote_path(relpath) curl.setopt(pycurl.URL, abspath) self._set_curl_options(curl) data = StringIO() header = StringIO() curl.setopt(pycurl.WRITEFUNCTION, data.write) curl.setopt(pycurl.HEADERFUNCTION, header.write) return abspath, data, header def _get_full(self, relpath): """Make a request for the entire file""" curl = self._get_curl() abspath, data, header = self._setup_get_request(curl, relpath) self._curl_perform(curl, header) code = curl.getinfo(pycurl.HTTP_CODE) data.seek(0) if code == 404: raise errors.NoSuchFile(abspath) if code != 200: self._raise_curl_http_error( curl, 'expected 200 or 404 for full response.') return code, data # The parent class use 0 to minimize the requests, but since we can't # exploit the results as soon as they are received (pycurl limitation) we'd # better issue more requests and provide a more responsive UI incurring # more latency costs. # If you modify this, think about modifying the comment in http/__init__.py # too. _get_max_size = 4 * 1024 * 1024 def _get_ranged(self, relpath, offsets, tail_amount): """Make a request for just part of the file.""" curl = self._get_curl() abspath, data, header = self._setup_get_request(curl, relpath) range_header = self._attempted_range_header(offsets, tail_amount) if range_header is None: # Forget ranges, the server can't handle them return self._get_full(relpath) self._curl_perform(curl, header, ['Range: bytes=%s' % range_header]) data.seek(0) code = curl.getinfo(pycurl.HTTP_CODE) if code == 404: # not found raise errors.NoSuchFile(abspath) elif code in (400, 416): # We don't know which, but one of the ranges we specified was # wrong. raise errors.InvalidHttpRange(abspath, range_header, 'Server return code %d' % curl.getinfo(pycurl.HTTP_CODE)) msg = self._parse_headers(header) return code, response.handle_response(abspath, code, msg, data) def _parse_headers(self, status_and_headers): """Transform the headers provided by curl into an HTTPMessage""" status_and_headers.seek(0) # Ignore status line status_and_headers.readline() msg = httplib.HTTPMessage(status_and_headers) return msg def _post(self, body_bytes): curl = self._get_curl() abspath, data, header = self._setup_request(curl, '.bzr/smart') curl.setopt(pycurl.POST, 1) fake_file = StringIO(body_bytes) curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes)) curl.setopt(pycurl.READFUNCTION, fake_file.read) # We override the Expect: header so that pycurl will send the POST # body immediately. try: self._curl_perform(curl, header, ['Expect: ', 'Content-Type: application/octet-stream']) except pycurl.error, e: if e[0] == CURLE_SEND_ERROR: # When talking to an HTTP/1.0 server, getting a 400+ error code # triggers a bug in some combinations of curl/kernel in rare # occurrences. Basically, the server closes the connection # after sending the error but the client (having received and # parsed the response) still try to send the request body (see # bug #225020 and its upstream associated bug). Since the # error code and the headers are known to be available, we just # swallow the exception, leaving the upper levels handle the # 400+ error. trace.mutter('got pycurl error in POST: %s, %s, %s, url: %s ', e[0], e[1], e, abspath) else: # Re-raise otherwise raise data.seek(0) code = curl.getinfo(pycurl.HTTP_CODE) msg = self._parse_headers(header) return code, response.handle_response(abspath, code, msg, data)