def get_connection(self, host, port): """Open a socket connection to a given host and port and writes the Hadoop header The Hadoop RPC protocol looks like this when creating a connection: +---------------------------------------------------------------------+ | Header, 4 bytes ("hrpc") | +---------------------------------------------------------------------+ | Version, 1 byte (default verion 9) | +---------------------------------------------------------------------+ | RPC service class, 1 byte (0x00) | +---------------------------------------------------------------------+ | Auth protocol, 1 byte (Auth method None = 0) | +---------------------------------------------------------------------+ | Length of the RpcRequestHeaderProto + length of the | | of the IpcConnectionContextProto (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized delimited RpcRequestHeaderProto | +---------------------------------------------------------------------+ | Serialized delimited IpcConnectionContextProto | +---------------------------------------------------------------------+ """ log.debug("############## CONNECTING ##############") auth = self.AUTH_PROTOCOL_NONE if self.token is None else self.AUTH_PROTOCOL_SASL # Open socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.sock.settimeout(self.timeout) # Connect socket to server - defined by host and port arguments self.sock.connect((host, port)) # Send RPC headers self.write(self.RPC_HEADER) # header self.write(struct.pack("B", self.version)) # version self.write(struct.pack("B", self.RPC_SERVICE_CLASS)) # RPC service class self.write(struct.pack("B", auth)) # serialization type (default none) if auth == SocketRpcChannel.AUTH_PROTOCOL_SASL: self.negotiate_sasl(self.token) self.call_id = -3 rpc_header = self.create_rpc_request_header() context = ( self.create_connection_context() if auth is self.AUTH_PROTOCOL_NONE else self.create_connection_context_auth() ) header_length = ( len(rpc_header) + encoder._VarintSize(len(rpc_header)) + len(context) + encoder._VarintSize(len(context)) ) if log.getEffectiveLevel() == logging.DEBUG: log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack("!I", header_length)))) self.write(struct.pack("!I", header_length)) self.write_delimited(rpc_header) self.write_delimited(context)
def get_connection(self, host, port): '''Open a socket connection to a given host and port and writes the Hadoop header The Hadoop RPC protocol looks like this when creating a connection: +---------------------------------------------------------------------+ | Header, 4 bytes ("hrpc") | +---------------------------------------------------------------------+ | Version, 1 byte (default verion 9) | +---------------------------------------------------------------------+ | RPC service class, 1 byte (0x00) | +---------------------------------------------------------------------+ | Auth protocol, 1 byte (Auth method None = 0) | +---------------------------------------------------------------------+ | Length of the RpcRequestHeaderProto + length of the | | of the IpcConnectionContextProto (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized delimited RpcRequestHeaderProto | +---------------------------------------------------------------------+ | Serialized delimited IpcConnectionContextProto | +---------------------------------------------------------------------+ ''' log.debug("############## CONNECTING ##############") # Open socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.sock.settimeout(self.timeout) # Connect socket to server - defined by host and port arguments self.sock.connect((host, port)) # Send RPC headers self.write(self.RPC_HEADER) # header self.write(struct.pack('B', self.version)) # version self.write(struct.pack('B', self.RPC_SERVICE_CLASS)) # RPC service class self.write(struct.pack( 'B', self.AUTH_PROTOCOL_NONE)) # serialization type (protobuf = 0) rpc_header = self.create_rpc_request_header() context = self.create_connection_context() header_length = len(rpc_header) + encoder._VarintSize( len(rpc_header)) + len(context) + encoder._VarintSize(len(context)) if log.getEffectiveLevel() == logging.DEBUG: log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length)))) self.write(struct.pack('!I', header_length)) self.write_delimited(rpc_header) self.write_delimited(context)
def send_rpc_message(self, method, request): '''Sends a Hadoop RPC request to the NameNode. The IpcConnectionContextProto, RpcPayloadHeaderProto and HadoopRpcRequestProto should already be serialized in the right way (delimited or not) before they are passed in this method. The Hadoop RPC protocol looks like this for sending requests: When sending requests +---------------------------------------------------------------------+ | Length of the next three parts (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Delimited serialized RpcRequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized RequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized Request (varint len + request) | +---------------------------------------------------------------------+ ''' log.debug("############## SENDING ##############") #0. RpcRequestHeaderProto rpc_request_header = self.create_rpc_request_header() #1. RequestHeaderProto request_header = self.create_request_header(method) #2. Param param = request.SerializeToString() if log.getEffectiveLevel() == logging.DEBUG: log_protobuf_message("Request", request) rpc_message_length = len(rpc_request_header) + encoder._VarintSize(len(rpc_request_header)) + \ len(request_header) + encoder._VarintSize(len(request_header)) + \ len(param) + encoder._VarintSize(len(param)) if log.getEffectiveLevel() == logging.DEBUG: log.debug("RPC message length: %s (%s)" % (rpc_message_length, format_bytes(struct.pack('!I', rpc_message_length)))) if self.use_sasl and self.sasl.use_wrap(): log.debug("SASL QOP requested, wrapping RPC message.") self.sasl.wrap( struct.pack('!I', rpc_message_length) + encoder._VarintBytes(len(rpc_request_header)) + rpc_request_header + encoder._VarintBytes(len(request_header)) + request_header + encoder._VarintBytes(len(param)) + param) else: self.write(struct.pack('!I', rpc_message_length)) self.write_delimited(rpc_request_header) self.write_delimited(request_header) self.write_delimited(param)
def send_rpc_message(self, method, request): """Sends a Hadoop RPC request to the NameNode. The IpcConnectionContextProto, RpcPayloadHeaderProto and HadoopRpcRequestProto should already be serialized in the right way (delimited or not) before they are passed in this method. The Hadoop RPC protocol looks like this for sending requests: When sending requests +---------------------------------------------------------------------+ | Length of the next three parts (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Delimited serialized RpcRequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized RequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized Request (varint len + request) | +---------------------------------------------------------------------+ """ log.debug("############## SENDING ##############") # 0. RpcRequestHeaderProto rpc_request_header = self.create_rpc_request_header() # 1. RequestHeaderProto request_header = self.create_request_header(method) # 2. Param param = request.SerializeToString() if log.getEffectiveLevel() == logging.DEBUG: log_protobuf_message("Request", request) rpc_message_length = ( len(rpc_request_header) + encoder._VarintSize(len(rpc_request_header)) + len(request_header) + encoder._VarintSize(len(request_header)) + len(param) + encoder._VarintSize(len(param)) ) if log.getEffectiveLevel() == logging.DEBUG: log.debug( "RPC message length: %s (%s)" % (rpc_message_length, format_bytes(struct.pack("!I", rpc_message_length))) ) self.write(struct.pack("!I", rpc_message_length)) self.write_delimited(rpc_request_header) self.write_delimited(request_header) self.write_delimited(param)
def get_connection(self, host, port): '''Open a socket connection to a given host and port and writes the Hadoop header The Hadoop RPC protocol looks like this when creating a connection: +---------------------------------------------------------------------+ | Header, 4 bytes ("hrpc") | +---------------------------------------------------------------------+ | Version, 1 byte (default verion 9) | +---------------------------------------------------------------------+ | RPC service class, 1 byte (0x00) | +---------------------------------------------------------------------+ | Auth protocol, 1 byte (Auth method None = 0) | +---------------------------------------------------------------------+ | Length of the RpcRequestHeaderProto + length of the | | of the IpcConnectionContextProto (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized delimited RpcRequestHeaderProto | +---------------------------------------------------------------------+ | Serialized delimited IpcConnectionContextProto | +---------------------------------------------------------------------+ ''' log.debug("############## CONNECTING ##############") # Open socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.sock.settimeout(self.sock_connect_timeout / 1000) # Connect socket to server - defined by host and port arguments self.sock.connect((host, port)) self.sock.settimeout(self.sock_request_timeout / 1000) # Send RPC headers self.write(self.RPC_HEADER) # header self.write(struct.pack('B', self.version)) # version self.write(struct.pack('B', self.RPC_SERVICE_CLASS)) # RPC service class if self.use_sasl: self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL)) # serialization type (protobuf = 0xDF) else: self.write(struct.pack('B', self.AUTH_PROTOCOL_NONE)) # serialization type (protobuf = 0) if self.use_sasl: sasl = SaslRpcClient(self, hdfs_namenode_principal=self.hdfs_namenode_principal) sasl_connected = sasl.connect() if not sasl_connected: raise TransientException("SASL is configured, but cannot get connected") rpc_header = self.create_rpc_request_header() context = self.create_connection_context() header_length = len(rpc_header) + encoder._VarintSize(len(rpc_header)) +len(context) + encoder._VarintSize(len(context)) if log.getEffectiveLevel() == logging.DEBUG: log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length)))) self.write(struct.pack('!I', header_length)) self.write_delimited(rpc_header) self.write_delimited(context)
def numpy_u32_encoder(write, value): value_bytes = value.tobytes() value_len = len(value_bytes) tag1 = (3 << 3) | 2 tag2 = (1 << 3) | 2 write(bytes([tag1])) _EncodeVarint(write, 1 + _VarintSize(value_len) + value_len) write(bytes([tag2])) _EncodeVarint(write, value_len) return write(value_bytes)
def _send_sasl_message(self, message): rpcheader = RpcRequestHeaderProto() rpcheader.rpcKind = 2 # RPC_PROTOCOL_BUFFER rpcheader.rpcOp = 0 rpcheader.callId = -33 # SASL rpcheader.retryCount = -1 rpcheader.clientId = b"" s_rpcheader = rpcheader.SerializeToString() s_message = message.SerializeToString() header_length = len(s_rpcheader) + encoder._VarintSize( len(s_rpcheader)) + len(s_message) + encoder._VarintSize( len(s_message)) self._trans.write(struct.pack('!I', header_length)) self._trans.write_delimited(s_rpcheader) self._trans.write_delimited(s_message) log_protobuf_message("Send out", message)
def _send_sasl_message(self, message): rpcheader = RpcRequestHeaderProto() rpcheader.rpcKind = 2 # RPC_PROTOCOL_BUFFER rpcheader.rpcOp = 0 rpcheader.callId = -33 # SASL rpcheader.retryCount = -1 rpcheader.clientId = b"" s_rpcheader = rpcheader.SerializeToString() s_message = message.SerializeToString() header_length = ( len(s_rpcheader) + encoder._VarintSize(len(s_rpcheader)) + len(s_message) + encoder._VarintSize(len(s_message)) ) self._trans.write(struct.pack("!I", header_length)) self._trans.write_delimited(s_rpcheader) self._trans.write_delimited(s_message) log_protobuf_message("Send out", message)
def getInfo(protocolName,methodName,requestProto,responseProto): rpcrequestheader = RpcRequestHeaderProto() rpcrequestheader.rpcKind = 2 #RpcHeader_pb2.RPC_PROTOCOL_BUFFER rpcrequestheader.rpcOp = 0 #RpcRequestHeaderProto.RPC_FINAL_PACKET rpcrequestheader.callId = -3 # During initial connection # 0 otherwise # 4 for ping i guess client_id = str(uuid.uuid4()) rpcrequestheader.clientId = client_id[0:16] s_rpcrequestheader = rpcrequestheader.SerializeToString() '''Create and serialize a IpcConnectionContextProto ''' context = IpcConnectionContextProto() context.userInfo.effectiveUser = "******" context.protocol = protocolName #"org.apache.hadoop.mapreduce.v2.api.MRClientProtocolPB" s_context = context.SerializeToString() ''' Length of the two messages ''' rpcipc_length = len(s_rpcrequestheader) + encoder._VarintSize(len(s_rpcrequestheader)) + len(s_context) + encoder._VarintSize(len(s_context)) ''' Send to server in the order given above''' sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.settimeout(10) sock.connect(("localhost", 8032)) #8020 for name node, 8032 for yarn sock.send("hrpc") # header sock.send(struct.pack('B', 9)) # version sock.send(struct.pack('B', 0x00)) # RPC service class sock.send(struct.pack('B', 0x00)) # auth none sock.sendall(struct.pack('!I', rpcipc_length) + encoder._VarintBytes(len(s_rpcrequestheader)) + s_rpcrequestheader + encoder._VarintBytes(len(s_context)) + s_context) ''' Create the Hadoop RPC protocol looks like this for sending requests: When sending requests +---------------------------------------------------------------------+ | Length of the next three parts (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Delimited serialized RpcRequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized RequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized Request (varint len + request) | +---------------------------------------------------------------------+ ''' ''' Steps: 1. create rpcrequestheader 2. create requestheader in which you can mention the name of protocol and method name you want 3. create actual request i guess you can use it to pass parameters ''' ''' we need a rpcrequestheaderproto for every message we send ''' rpcrequestheader = RpcRequestHeaderProto() rpcrequestheader.rpcKind = 2 #RpcHeader_pb2.RPC_PROTOCOL_BUFFER rpcrequestheader.rpcOp = 0 #RpcRequestHeaderProto.RPC_FINAL_PACKET rpcrequestheader.callId = 0 # For all other communication other than initial, 4 for ping client_id = str(uuid.uuid4()) rpcrequestheader.clientId = client_id[0:16] s_rpcrequestheader = rpcrequestheader.SerializeToString() ''' ok thats our header''' '''lets create our requestheaderproto ''' requestheader = RequestHeaderProto() requestheader.methodName = methodName #"getClusterNodes" requestheader.declaringClassProtocolName = protocolName #"org.apache.hadoop.yarn.api.ApplicationClientProtocolPB" requestheader.clientProtocolVersion= 1 '''serialize this ''' s_requestheader = requestheader.SerializeToString() '''Now we need to write our actual request....may be lets start it here ''' #request = protoFile.GetClusterMetricsRequestProto() s_request = requestProto.SerializeToString() ''' lenght of three messages ''' rpc_message_length = len(s_rpcrequestheader) + encoder._VarintSize(len(s_rpcrequestheader)) + \ len(s_requestheader) + encoder._VarintSize(len(s_requestheader)) + \ len(s_request) + encoder._VarintSize(len(s_request)) '''pack in the above given format and send :) ''' sock.sendall(struct.pack('!I', rpc_message_length) + encoder._VarintBytes(len(s_rpcrequestheader)) + s_rpcrequestheader + encoder._VarintBytes(len(s_requestheader))+ s_requestheader+ encoder._VarintBytes(len(s_request)) + s_request) #responseObject = yarn_service_protos_pb2.GetClusterMetricsResponseProto stream = recv_rpc_message(sock) parse_response(stream, responseProto)
def createMsg(): rpcrequestheader = RpcRequestHeaderProto() rpcrequestheader.rpcKind = 2 #RpcHeader_pb2.RPC_PROTOCOL_BUFFER rpcrequestheader.rpcOp = 0 #RpcRequestHeaderProto.RPC_FINAL_PACKET rpcrequestheader.callId = -3 # During initial connection # 0 otherwise # 4 for ping i guess client_id = str(uuid.uuid4()) rpcrequestheader.clientId = client_id[0:16] s_rpcrequestheader = rpcrequestheader.SerializeToString() '''Create and serialize a IpcConnectionContextProto ''' context = IpcConnectionContextProto() context.userInfo.effectiveUser = "******" context.protocol = "org.apache.hadoop.hdfs.protocol.ClientProtocol" #"org.apache.hadoop.yarn.protocol.MRClientProtocol" #"org.apache.hadoop.mapred.JobSubmissionProtocol" s_context = context.SerializeToString() ''' Length of the two messages ''' rpcipc_length = len(s_rpcrequestheader) + encoder._VarintSize(len(s_rpcrequestheader)) + len(s_context) + encoder._VarintSize(len(s_context)) ''' Send to server in the order given above''' sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.settimeout(10) sock.connect(("localhost", 8020)) sock.send("hrpc") # header sock.send(struct.pack('B', 9)) # version sock.send(struct.pack('B', 0x00)) # RPC service class sock.send(struct.pack('B', 0x00)) # auth none sock.sendall(struct.pack('!I', rpcipc_length) + encoder._VarintBytes(len(s_rpcrequestheader)) + s_rpcrequestheader + encoder._VarintBytes(len(s_context)) + s_context) ''' Create the Hadoop RPC protocol looks like this for sending requests: When sending requests +---------------------------------------------------------------------+ | Length of the next three parts (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Delimited serialized RpcRequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized RequestHeaderProto (varint len + header) | +---------------------------------------------------------------------+ | Delimited serialized Request (varint len + request) | +---------------------------------------------------------------------+ ''' ''' Steps: 1. create rpcrequestheader 2. create requestheader in which you can mention the name of protocol and method name you want 3. create actual request i guess you can use it to pass parameters ''' ''' we need a rpcrequestheaderproto for every message we send ''' rpcrequestheader = RpcRequestHeaderProto() rpcrequestheader.rpcKind = 2 #RpcHeader_pb2.RPC_PROTOCOL_BUFFER rpcrequestheader.rpcOp = 0 #RpcRequestHeaderProto.RPC_FINAL_PACKET rpcrequestheader.callId = 0 # For all other communication other than initial, 4 for ping i guess client_id = str(uuid.uuid4()) rpcrequestheader.clientId = client_id[0:16] s_rpcrequestheader = rpcrequestheader.SerializeToString() ''' ok thats our header''' '''lets create our requestheaderproto ''' requestheader = RequestHeaderProto() requestheader.methodName = "getServerDefaults" #"getDiagnostics" # "getCounters" #"getJobReport" #"GetFsStatusRequestProto " # #"getAllJobs" #GetFsStatusRequestProto requestheader.declaringClassProtocolName = "org.apache.hadoop.hdfs.protocol.ClientProtocol" #"org.apache.hadoop.yarn.protocol.MRClientProtocol" #"org.apache.hadoop.hdfs.protocol.ClientProtocol" #"org.apache.hadoop.mapred.JobSubmissionProtocol" # org.apache.hadoop.hdfs.protocol.ClientProtocol requestheader.clientProtocolVersion= 1 # not sure what is this 2,28 '''serialize this ''' s_requestheader = requestheader.SerializeToString() '''Now we need to write our actual request....may be lets start it here ''' #request = mr_service_protos_pb2.GetJobReportRequestProto() request = ClientNamenodeProtocol_pb2.GetServerDefaultsRequestProto() s_request = request.SerializeToString() # random shit ''' lenght of three messages ''' rpc_message_length = len(s_rpcrequestheader) + encoder._VarintSize(len(s_rpcrequestheader)) + \ len(s_requestheader) + encoder._VarintSize(len(s_requestheader)) + \ len(s_request) + encoder._VarintSize(len(s_request)) '''pack in the above given format and send :) ''' sock.sendall(struct.pack('!I', rpc_message_length) + encoder._VarintBytes(len(s_rpcrequestheader)) + s_rpcrequestheader + encoder._VarintBytes(len(s_requestheader))+ s_requestheader+ encoder._VarintBytes(len(s_request)) + s_request) #responseObject = mr_service_protos_pb2.GetJobReportResponseProto() responseObject = ClientNamenodeProtocol_pb2.GetServerDefaultsResponseProto() #get_message(sock,responseObject) stream = recv_rpc_message(sock) parse_response(stream, ClientNamenodeProtocol_pb2.GetServerDefaultsResponseProto) #response proto here print "reading response"
rpcrequestheader.callId = -3 # During initial connection # 0 otherwise # 4 for ping i guess client_id = str(uuid.uuid4()) rpcrequestheader.clientId = client_id[0:16] s_rpcrequestheader = rpcrequestheader.SerializeToString() '''Create and serialize a IpcConnectionContextProto ''' context = IpcConnectionContextProto() context.userInfo.effectiveUser = "******" context.protocol = "org.apache.hadoop.hdfs.protocol.ClientProtocol" #"org.apache.hadoop.mapred.JobSubmissionProtocol" #"org.apache.hadoop.hdfs.protocol.ClientProtocol" s_context = context.SerializeToString() ''' Length of the two messages ''' rpcipc_length = len(s_rpcrequestheader) + encoder._VarintSize(len(s_rpcrequestheader)) + len(s_context) + encoder._VarintSize(len(s_context)) ''' Send to server in the order given above''' sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.settimeout(10) sock.connect(("localhost", 8020)) sock.send("hrpc") # header sock.send(struct.pack('B', 9)) # version sock.send(struct.pack('B', 0x00)) # RPC service class sock.send(struct.pack('B', 0x00)) # auth none sock.sendall(struct.pack('!I', rpcipc_length) +
def numpy_u32_sizer(value): value_len = value.shape[0] * 4 # uint32 is 4 bytes wide value_tagged_len = 1 + _VarintSize(value_len) + value_len return 1 + _VarintSize(value_tagged_len) + value_tagged_len
def negotiate_sasl(self, token): log.debug("##############NEGOTIATING SASL#####################") # Prepares negotiate request header_bytes = self.create_sasl_header().SerializeToString() negotiate_request = RpcSaslProto() negotiate_request.state = RpcSaslProto.NEGOTIATE negotiate_request.version = 0 sasl_bytes = negotiate_request.SerializeToString() total_length = ( len(header_bytes) + len(sasl_bytes) + encoder._VarintSize(len(header_bytes)) + encoder._VarintSize(len(sasl_bytes)) ) # Sends negotiate request self.write(struct.pack("!I", total_length)) self.write_delimited(header_bytes) self.write_delimited(sasl_bytes) # Gets negotiate response bytes = self.recv_rpc_message() resp = self.parse_response(bytes, RpcSaslProto) chosen_auth = None for auth in resp.auths: if auth.method == "TOKEN" and auth.mechanism == "DIGEST-MD5": chosen_auth = auth if chosen_auth is None: raise IOError("Token digest-MD5 authentication not supported by server") # Prepares initiate request self.sasl = SASLClient( chosen_auth.serverId, chosen_auth.protocol, mechanism=chosen_auth.mechanism, username=base64.b64encode(token["identifier"]), password=base64.b64encode(token["password"]), ) challenge_resp = self.sasl.process(chosen_auth.challenge) auth = RpcSaslProto.SaslAuth() auth.method = chosen_auth.method auth.mechanism = chosen_auth.mechanism auth.protocol = chosen_auth.protocol auth.serverId = chosen_auth.serverId initiate_request = RpcSaslProto() initiate_request.state = RpcSaslProto.INITIATE initiate_request.version = 0 initiate_request.auths.extend([auth]) initiate_request.token = challenge_resp sasl_bytes = initiate_request.SerializeToString() total_length = ( len(header_bytes) + len(sasl_bytes) + encoder._VarintSize(len(header_bytes)) + encoder._VarintSize(len(sasl_bytes)) ) # Sends initiate request self.write(struct.pack("!I", total_length)) self.write_delimited(header_bytes) self.write_delimited(sasl_bytes) bytes = self.recv_rpc_message() resp = self.parse_response(bytes, RpcSaslProto)
def get_connection(self, host, port): '''Open a socket connection to a given host and port and writes the Hadoop header The Hadoop RPC protocol looks like this when creating a connection: +---------------------------------------------------------------------+ | Header, 4 bytes ("hrpc") | +---------------------------------------------------------------------+ | Version, 1 byte (default verion 9) | +---------------------------------------------------------------------+ | RPC service class, 1 byte (0x00) | +---------------------------------------------------------------------+ | Auth protocol, 1 byte (Auth method None = 0) | +---------------------------------------------------------------------+ | Length of the RpcRequestHeaderProto + length of the | | of the IpcConnectionContextProto (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized delimited RpcRequestHeaderProto | +---------------------------------------------------------------------+ | Serialized delimited IpcConnectionContextProto | +---------------------------------------------------------------------+ ''' log.debug("############## CONNECTING ##############") # Open socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.sock.settimeout(self.sock_connect_timeout / 1000) # Connect socket to server - defined by host and port arguments try: self.sock.connect((host, port)) except socket.error as e: raise RpcConnectionError("Unable to connect to rpc service : %s" % str(e)) self.sock.settimeout(self.sock_request_timeout / 1000) # Send RPC headers self.write(self.RPC_HEADER) # header self.write(struct.pack('B', self.version)) # version self.write(struct.pack('B', self.RPC_SERVICE_CLASS)) # RPC service class if self.use_sasl: self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL) ) # serialization type (protobuf = 0xDF) else: self.write(struct.pack( 'B', self.AUTH_PROTOCOL_NONE)) # serialization type (protobuf = 0) if self.use_sasl: sasl = SaslRpcClient(self, krb_principal=self.krb_principal) sasl_connected = sasl.connect() if not sasl_connected: raise RpcSaslError( "SASL is configured, but cannot get connected") rpc_header = self.create_rpc_request_header() context = self.create_connection_context() header_length = len(rpc_header) + encoder._VarintSize( len(rpc_header)) + len(context) + encoder._VarintSize(len(context)) if log.getEffectiveLevel() == logging.DEBUG: log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length)))) self.write(struct.pack('!I', header_length)) self.write_delimited(rpc_header) self.write_delimited(context)