def _discover_references(self, service, url): assert url[-1] == "/" url = urlparse.urljoin(url, "info/refs") headers = {} if self.dumb is not False: url += "?service=%s" % service.decode('ascii') headers["Content-Type"] = "application/x-%s-request" % ( service.decode('ascii')) resp = self._http_request(url, headers) try: content_type = resp.info().gettype() except AttributeError: content_type = resp.info().get_content_type() try: self.dumb = (not content_type.startswith("application/x-git-")) if not self.dumb: proto = Protocol(resp.read, None) # The first line should mention the service try: [pkt] = list(proto.read_pkt_seq()) except ValueError: raise GitProtocolError( "unexpected number of packets received") if pkt.rstrip(b'\n') != (b'# service=' + service): raise GitProtocolError( "unexpected first line %r from smart server" % pkt) return read_pkt_refs(proto) else: return read_info_refs(resp), set() finally: resp.close()
def _setHead(self, target_url, target_ref): """Set HEAD on a remote repository. This relies on the turnip-set-symbolic-ref extension. """ service = "turnip-set-symbolic-ref" url = urljoin(target_url, service) headers = { "Content-Type": "application/x-%s-request" % service, } body = pkt_line("HEAD %s" % target_ref) + pkt_line(None) try: response = urlfetch(url, method="POST", headers=headers, data=body) response.raise_for_status() except Exception as e: raise GitProtocolError(str(e)) content_type = response.headers.get("Content-Type") if content_type != ("application/x-%s-result" % service): raise GitProtocolError("Invalid Content-Type from server: %s" % content_type) content = io.BytesIO(response.content) proto = Protocol(content.read, None) pkt = proto.read_pkt_line() if pkt is None: raise GitProtocolError("Unexpected flush-pkt from server") elif pkt.rstrip(b"\n") == b"ACK HEAD": pass elif pkt.startswith(b"ERR "): raise GitProtocolError( pkt[len(b"ERR "):].rstrip(b"\n").decode("UTF-8")) else: raise GitProtocolError("Unexpected packet %r from server" % pkt)
def _split_proto_line(line, allowed): """Split a line read from the wire. :param line: The line read from the wire. :param allowed: An iterable of command names that should be allowed. Command names not listed below as possible return values will be ignored. If None, any commands from the possible return values are allowed. :return: a tuple having one of the following forms: ('want', obj_id) ('have', obj_id) ('done', None) (None, None) (for a flush-pkt) :raise UnexpectedCommandError: if the line cannot be parsed into one of the allowed return values. """ if not line: fields = [None] else: fields = line.rstrip(b'\n').split(b' ', 1) command = fields[0] if allowed is not None and command not in allowed: raise UnexpectedCommandError(command) if len(fields) == 1 and command in (COMMAND_DONE, None): return (command, None) elif len(fields) == 2: if command in (COMMAND_WANT, COMMAND_HAVE, COMMAND_SHALLOW, COMMAND_UNSHALLOW): if not valid_hexsha(fields[1]): raise GitProtocolError("Invalid sha") return tuple(fields) elif command == COMMAND_DEEPEN: return command, int(fields[1]) raise GitProtocolError('Received invalid line from client: %r' % line)
def read_pkt_line(self): """Reads a pkt-line from the remote git process. This method may read from the readahead buffer; see unread_pkt_line. Returns: The next string from the stream, without the length prefix, or None for a flush-pkt ('0000'). """ if self._readahead is None: read = self.read else: read = self._readahead.read self._readahead = None try: sizestr = read(4) if not sizestr: raise HangupException() size = int(sizestr, 16) if size == 0: if self.report_activity: self.report_activity(4, "read") return None if self.report_activity: self.report_activity(size, "read") pkt_contents = read(size - 4) except socket.error as e: raise GitProtocolError(e) else: if len(pkt_contents) + 4 != size: raise GitProtocolError( "Length of pkt read %04x does not match length prefix %04x" % (len(pkt_contents) + 4, size)) return pkt_contents
def set_client_capabilities(self, caps): allowable_caps = set(self.innocuous_capabilities()) allowable_caps.update(self.capabilities()) for cap in caps: if cap not in allowable_caps: raise GitProtocolError('Client asked for capability %s that ' 'was not advertised.' % cap) for cap in self.required_capabilities(): if cap not in caps: raise GitProtocolError('Client does not support required ' 'capability %s.' % cap) self._client_capabilities = set(caps) logger.info('Client capabilities: %s', caps)
def set_client_capabilities(self, caps: Iterable[bytes]) -> None: allowable_caps = set(self.innocuous_capabilities()) allowable_caps.update(self.capabilities()) for cap in caps: if cap.startswith(CAPABILITY_AGENT + b'='): continue if cap not in allowable_caps: raise GitProtocolError('Client asked for capability %r that ' 'was not advertised.' % cap) for cap in self.required_capabilities(): if cap not in caps: raise GitProtocolError('Client does not support required ' 'capability %r.' % cap) self._client_capabilities = set(caps) logger.info('Client capabilities: %s', caps)
def _smart_request(self, service, url, data): assert url[-1] == "/" url = urlparse.urljoin(url, service) req = urllib2.Request(url, headers={"Content-Type": "application/x-%s-request" % service}, data=data) resp = self._perform(req) if resp.getcode() == 404: raise NotGitRepository() if resp.getcode() != 200: raise GitProtocolError("Invalid HTTP response from server: %d" % resp.getcode()) if resp.info().gettype() != ("application/x-%s-result" % service): raise GitProtocolError("Invalid content-type from server: %s" % resp.info().gettype()) return resp
def _http_request(self, url, headers=None, data=None, allow_compression=False): """Perform HTTP request. :param url: Request URL. :param headers: Optional custom headers to override defaults. :param data: Request data. :param allow_compression: Allow GZipped communication. :return: Tuple (`response`, `read`), where response is an `urllib3` response object with additional `content_type` and `redirect_location` properties, and `read` is a consumable read method for the response data. """ if is_github_url(url): headers['User-agent'] = user_agent_for_github() headers["Pragma"] = "no-cache" if allow_compression: headers["Accept-Encoding"] = "gzip" else: headers["Accept-Encoding"] = "identity" response = self.transport.request(('GET' if data is None else 'POST'), url, body=data, headers=headers, retries=8) if response.status == 404: raise NotGitRepository() elif response.status != 200: raise GitProtocolError("unexpected http resp %d for %s" % (response.status, url)) # TODO: Optimization available by adding `preload_content=False` to the # request and just passing the `read` method on instead of going via # `BytesIO`, if we can guarantee that the entire response is consumed # before issuing the next to still allow for connection reuse from the # pool. if response.getheader("Content-Encoding") == "gzip": read = gzip.GzipFile(fileobj=BytesIO(response.read())).read else: read = response.read class WrapResponse(object): def __init__(self, response): self._response = response self.status = response.status self.content_type = response.getheader("Content-Type") self.redirect_location = response._actual.geturl() def readlines(self): return self._response.readlines() def close(self): pass return WrapResponse(response), read
def _connect(self, cmd, path): """ Override connection establishment in SSHGitClient class so that pubkey is used. """ # FIXME: This has no way to deal with passphrases.. # FIXME: can we rely on ssh being in PATH here ? args = ['ssh', '-x', '-oStrictHostKeyChecking=no'] if not (os.path.exists(self.pubkey) and os.access(self.pubkey, os.R_OK)): raise GitProtocolError( "Public key file is missing or incaccesible") args.extend(['-i', self.pubkey]) if self.port is not None: args.extend(['-p', str(self.port)]) if self.username is not None: host = '{0}@{1}'.format(self.username, self.host) else: host = self.host args.append(host) args.extend(["{0} '{1}'".format(self._get_cmd_path(cmd), path)]) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) con = SubprocessWrapper(proc) logging.info("Connected to repo {0}:{1} via ssh, cmd: {2}".format( self.host, self.port if self.port else 22, cmd)) return (Protocol(con.read, con.write, report_activity=self._report_activity), con.can_read)
def _getHead(self, repository, remote_name): """Get HEAD from a configured remote in a local repository. The returned ref name will be adjusted in such a way that it can be passed to `_setHead` (e.g. refs/remotes/origin/master -> refs/heads/master). """ # This is a bit weird, but set-head will bail out if the target # doesn't exist in the correct remotes namespace. git 2.8.0 has # "git ls-remote --symref <repository> HEAD" which would involve # less juggling. self._runGit("fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name, cwd=repository) self._runGit("remote", "set-head", remote_name, "--auto", cwd=repository) ref_prefix = "refs/remotes/%s/" % remote_name target_ref = subprocess.check_output( ["git", "symbolic-ref", ref_prefix + "HEAD"], cwd=repository, universal_newlines=True).rstrip("\n") if not target_ref.startswith(ref_prefix): raise GitProtocolError( "'git remote set-head %s --auto' did not leave remote HEAD " "under %s" % (remote_name, ref_prefix)) real_target_ref = "refs/heads/" + target_ref[len(ref_prefix):] # Ensure the result is a valid ref name, just in case. self._runGit("check-ref-format", real_target_ref, cwd="repository") return real_target_ref
def archive(self, path, committish, write_data, progress=None, write_error=None): proto, can_read = self._connect(b'upload-archive', path) with proto: proto.write_pkt_line("argument %s" % committish) proto.write_pkt_line(None) pkt = proto.read_pkt_line() if pkt == "NACK\n": return elif pkt == "ACK\n": pass elif pkt.startswith("ERR "): raise GitProtocolError(pkt[4:].rstrip("\n")) else: raise AssertionError("invalid response %r" % pkt) ret = proto.read_pkt_line() if ret is not None: raise AssertionError("expected pkt tail") self._read_side_band64k_data(proto, { 1: write_data, 2: progress, 3: write_error })
def _split_proto_line(line, allowed): """Split a line read from the wire. :param line: The line read from the wire. :param allowed: An iterable of command names that should be allowed. Command names not listed below as possible return values will be ignored. If None, any commands from the possible return values are allowed. :return: a tuple having one of the following forms: ('want', obj_id) ('have', obj_id) ('done', None) (None, None) (for a flush-pkt) :raise UnexpectedCommandError: if the line cannot be parsed into one of the allowed return values. """ if not line: fields = [None] else: fields = line.rstrip('\n').split(' ', 1) command = fields[0] if allowed is not None and command not in allowed: raise UnexpectedCommandError(command) try: if len(fields) == 1 and command in ('done', None): return (command, None) elif len(fields) == 2 and command in ('want', 'have'): hex_to_sha(fields[1]) return tuple(fields) except (TypeError, AssertionError), e: raise GitProtocolError(e)
def handle(self): def write(x): return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x) arguments = [] for pkt in self.proto.read_pkt_seq(): (key, value) = pkt.split(b' ', 1) if key != b'argument': raise GitProtocolError('unknown command %s' % key) arguments.append(value.rstrip(b'\n')) prefix = b'' format = 'tar' i = 0 store = self.repo.object_store while i < len(arguments): argument = arguments[i] if argument == b'--prefix': i += 1 prefix = arguments[i] elif argument == b'--format': i += 1 format = arguments[i].decode('ascii') else: commit_sha = self.repo.refs[argument] tree = store[store[commit_sha].tree] i += 1 self.proto.write_pkt_line(b'ACK\n') self.proto.write_pkt_line(None) for chunk in tar_stream( store, tree, mtime=time.time(), prefix=prefix, format=format): write(chunk) self.proto.write_pkt_line(None)
def archive(self, path, committish, write_data, progress=None, write_error=None): proto, can_read = self._connect(b'upload-archive', path) with proto: proto.write_pkt_line(b"argument " + committish) proto.write_pkt_line(None) pkt = proto.read_pkt_line() if pkt == b"NACK\n": return elif pkt == b"ACK\n": pass elif pkt.startswith(b"ERR "): raise GitProtocolError(pkt[4:].rstrip(b"\n")) else: raise AssertionError("invalid response %r" % pkt) ret = proto.read_pkt_line() if ret is not None: raise AssertionError("expected pkt tail") self._read_side_band64k_data( proto, { SIDE_BAND_CHANNEL_DATA: write_data, SIDE_BAND_CHANNEL_PROGRESS: progress, SIDE_BAND_CHANNEL_FATAL: write_error })
def read_pkt_line(self): """Reads a pkt-line from the remote git process. This method may read from the readahead buffer; see unread_pkt_line. :return: The next string from the stream, without the length prefix, or None for a flush-pkt ('0000'). """ if self._readahead is None: read = self.read else: read = self._readahead.read self._readahead = None try: sizestr = read(4) if not sizestr: raise HangupException() size = int(sizestr, 16) if size == 0: if self.report_activity: self.report_activity(4, 'read') return None if self.report_activity: self.report_activity(size, 'read') return read(size - 4) except socket.error as e: raise GitProtocolError(e)
def _apply_pack( self, refs: List[Tuple[bytes, bytes, bytes]]) -> List[Tuple[bytes, bytes]]: all_exceptions = ( IOError, OSError, ChecksumMismatch, ApplyDeltaError, AssertionError, socket.error, zlib.error, ObjectFormatException, ) status = [] will_send_pack = False for command in refs: if command[1] != ZERO_SHA: will_send_pack = True if will_send_pack: # TODO: more informative error messages than just the exception # string try: recv = getattr(self.proto, "recv", None) self.repo.object_store.add_thin_pack(self.proto.read, recv) status.append((b"unpack", b"ok")) except all_exceptions as e: status.append((b"unpack", str(e).replace("\n", "").encode("utf-8"))) # The pack may still have been moved in, but it may contain # broken objects. We trust a later GC to clean it up. else: # The git protocol want to find a status entry related to unpack # process even if no pack data has been sent. status.append((b"unpack", b"ok")) for oldsha, sha, ref in refs: ref_status = b"ok" try: if sha == ZERO_SHA: if CAPABILITY_DELETE_REFS not in self.capabilities(): raise GitProtocolError( "Attempted to delete refs without delete-refs " "capability.") try: self.repo.refs.remove_if_equals(ref, oldsha) except all_exceptions: ref_status = b"failed to delete" else: try: self.repo.refs.set_if_equals(ref, oldsha, sha) except all_exceptions: ref_status = b"failed to write" except KeyError: ref_status = b"bad ref" status.append((ref, ref_status)) return status
def determine_wants(self, heads): """Determine the wants for a set of heads. The given heads are advertised to the client, who then specifies which refs he wants using 'want' lines. This portion of the protocol is the same regardless of ack type, and in fact is used to set the ack type of the ProtocolGraphWalker. :param heads: a dict of refname->SHA1 to advertise :return: a list of SHA1s requested by the client """ if not heads: # The repo is empty, so short-circuit the whole process. self.proto.write_pkt_line(None) return None values = set(heads.itervalues()) if self.advertise_refs or not self.http_req: for i, (ref, sha) in enumerate(sorted(heads.iteritems())): line = "%s %s" % (sha, ref) if not i: line = "%s\x00%s" % (line, self.handler.capability_line()) self.proto.write_pkt_line("%s\n" % line) peeled_sha = self.get_peeled(ref) if peeled_sha != sha: self.proto.write_pkt_line('%s %s^{}\n' % (peeled_sha, ref)) # i'm done.. self.proto.write_pkt_line(None) if self.advertise_refs: return None # Now client will sending want want want commands want = self.proto.read_pkt_line() if not want: return [] line, caps = extract_want_line_capabilities(want) self.handler.set_client_capabilities(caps) self.set_ack_type(ack_type(caps)) allowed = ('want', None) command, sha = _split_proto_line(line, allowed) want_revs = [] while command != None: if sha not in values: raise GitProtocolError('Client wants invalid object %s' % sha) want_revs.append(sha) command, sha = self.read_proto_line(allowed) self.set_wants(want_revs) if self.http_req and self.proto.eof(): # The client may close the socket at this point, expecting a # flush-pkt from the server. We might be ready to send a packfile at # this point, so we need to explicitly short-circuit in this case. return None return want_revs
def _smart_request(self, service, url, data): assert url[-1] == "/" url = urlparse.urljoin(url, service) headers = {"Content-Type": "application/x-%s-request" % service} resp = self._http_request(url, headers, data) if resp.info().gettype() != ("application/x-%s-result" % service): raise GitProtocolError("Invalid content-type from server: %s" % resp.info().gettype()) return resp
def _http_request(self, url, headers={}, data=None): req = urllib2.Request(url, headers=headers, data=data) try: resp = self.opener.open(req) except urllib2.HTTPError as e: if e.code == 404: raise NotGitRepository() if e.code != 200: raise GitProtocolError("unexpected http response %d" % e.code) return resp
def handle(self): proto = ReceivableProtocol(self.connection.recv, self.wfile.write) command, args = proto.read_cmd() logger.info('Handling %s request, args=%s', command, args) cls = self.handlers.get(command, None) if not callable(cls): raise GitProtocolError('Invalid service %s' % command) h = cls(self.server.backend, args, proto) h.handle()
def _read_refs(self, proto): server_capabilities = None refs = {} # Receive refs from server for pkt in proto.read_pkt_seq(): (sha, ref) = pkt.rstrip('\n').split(' ', 1) if sha == 'ERR': raise GitProtocolError(ref) if server_capabilities is None: (ref, server_capabilities) = extract_capabilities(ref) refs[ref] = sha return refs, set(server_capabilities)
def write_pkt_line(self, line): """Sends a pkt-line to the remote git process. :param line: A string containing the data to send, without the length prefix. """ try: line = pkt_line(line) self.write(line) if self.report_activity: self.report_activity(len(line), 'write') except socket.error as e: raise GitProtocolError(e)
class ReceivePackHandler(Handler): """Protocol handler for downloading a pack from the client.""" def __init__(self, backend, args, proto, http_req=None, advertise_refs=False): Handler.__init__(self, backend, proto, http_req=http_req) self.repo = backend.open_repository(args[0]) self.advertise_refs = advertise_refs @classmethod def capabilities(cls): return ("report-status", "delete-refs", "side-band-64k") def _apply_pack(self, refs): all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError, AssertionError, socket.error, zlib.error, ObjectFormatException) status = [] # TODO: more informative error messages than just the exception string try: recv = getattr(self.proto, "recv", None) p = self.repo.object_store.add_thin_pack(self.proto.read, recv) status.append(('unpack', 'ok')) except all_exceptions, e: status.append(('unpack', str(e).replace('\n', ''))) # The pack may still have been moved in, but it may contain broken # objects. We trust a later GC to clean it up. for oldsha, sha, ref in refs: ref_status = 'ok' try: if sha == ZERO_SHA: if not 'delete-refs' in self.capabilities(): raise GitProtocolError( 'Attempted to delete refs without delete-refs ' 'capability.') try: del self.repo.refs[ref] except all_exceptions: ref_status = 'failed to delete' else: try: self.repo.refs[ref] = sha except all_exceptions: ref_status = 'failed to write' except KeyError, e: ref_status = 'bad ref' status.append((ref, ref_status))
def _discover_references(self, service, url): assert url[-1] == "/" url = urlparse.urljoin(url, "info/refs") headers = {} if self.dumb != False: url += "?service=%s" % service headers["Content-Type"] = "application/x-%s-request" % service req = urllib2.Request(url, headers=headers) resp = self._perform(req) if resp.getcode() == 404: raise NotGitRepository() if resp.getcode() != 200: raise GitProtocolError("unexpected http response %d" % resp.getcode()) self.dumb = (not resp.info().gettype().startswith("application/x-git-")) proto = Protocol(resp.read, None) if not self.dumb: # The first line should mention the service pkts = list(proto.read_pkt_seq()) if pkts != [('# service=%s\n' % service)]: raise GitProtocolError( "unexpected first line %r from smart server" % pkts) return self._read_refs(proto)
def read_pkt_refs(proto): server_capabilities = None refs = {} # Receive refs from server for pkt in proto.read_pkt_seq(): (sha, ref) = pkt.rstrip(b'\n').split(None, 1) if sha == b'ERR': raise GitProtocolError(ref) if server_capabilities is None: (ref, server_capabilities) = extract_capabilities(ref) refs[ref] = sha if len(refs) == 0: return None, set([]) return refs, set(server_capabilities)
def _apply_pack(self, refs): all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError, AssertionError, socket.error, zlib.error, ObjectFormatException) status = [] will_send_pack = False for command in refs: if command[1] != ZERO_SHA: will_send_pack = True if will_send_pack: # TODO: more informative error messages than just the exception string try: recv = getattr(self.proto, "recv", None) self.repo.object_store.add_thin_pack(self.proto.read, recv) status.append((b'unpack', b'ok')) except all_exceptions as e: status.append((b'unpack', str(e).replace('\n', ''))) # The pack may still have been moved in, but it may contain broken # objects. We trust a later GC to clean it up. else: # The git protocol want to find a status entry related to unpack process # even if no pack data has been sent. status.append((b'unpack', b'ok')) for oldsha, sha, ref in refs: ref_status = b'ok' try: if sha == ZERO_SHA: if not CAPABILITY_DELETE_REFS in self.capabilities(): raise GitProtocolError( 'Attempted to delete refs without delete-refs ' 'capability.') try: del self.repo.refs[ref] except all_exceptions: ref_status = b'failed to delete' else: try: self.repo.refs[ref] = sha except all_exceptions: ref_status = b'failed to write' except KeyError as e: ref_status = b'bad ref' status.append((ref, ref_status)) return status
def _http_request(self, url, headers=None, data=None, allow_compression=False): """Perform HTTP request. :param url: Request URL. :param headers: Optional custom headers to override defaults. :param data: Request data. :return: Tuple (`response`, `read`), where response is an `urllib3` response object with additional `content_type` and `redirect_location` properties, and `read` is a consumable read method for the response data. """ if is_github_url(url): headers['User-agent'] = user_agent_for_github() headers["Pragma"] = "no-cache" response = self.transport.request(('GET' if data is None else 'POST'), url, body=data, headers=headers, retries=8) if response.status == 404: raise NotGitRepository() elif response.status != 200: raise GitProtocolError("unexpected http resp %d for %s" % (response.status, url)) read = response.read class WrapResponse(object): def __init__(self, response): self._response = response self.status = response.status self.content_type = response.getheader("Content-Type") self.redirect_location = response._actual.geturl() def readlines(self): return self._response.readlines() def close(self): pass return WrapResponse(response), read
def write_pkt_line(self, line): """ Sends a 'pkt line' to the remote git process :param line: A string containing the data to send """ try: if line is None: self.write("0000") if self.report_activity: self.report_activity(4, 'write') else: self.write("%04x%s" % (len(line) + 4, line)) if self.report_activity: self.report_activity(4 + len(line), 'write') except socket.error, e: raise GitProtocolError(e)
def handle_packet(self, pkt): """Handle a packet. :raise GitProtocolError: Raised when packets are received after a flush packet. """ if self._done: raise GitProtocolError("received more data after status report") if pkt is None: self._done = True return if self._pack_status is None: self._pack_status = pkt.strip() else: ref_status = pkt.strip() self._ref_statuses.append(ref_status) if not ref_status.startswith(b'ok '): self._ref_status_ok = False
def _discover_references(self, service, url): assert url[-1] == "/" url = urlparse.urljoin(url, "info/refs") headers = {} if self.dumb != False: url += "?service=%s" % service headers["Content-Type"] = "application/x-%s-request" % service resp = self._http_request(url, headers) self.dumb = (not resp.info().gettype().startswith("application/x-git-")) if not self.dumb: proto = Protocol(resp.read, None) # The first line should mention the service pkts = list(proto.read_pkt_seq()) if pkts != [('# service=%s\n' % service)]: raise GitProtocolError( "unexpected first line %r from smart server" % pkts) return read_pkt_refs(proto) else: return read_info_refs(resp), set()