def get_connection(self, host, port): '''Open a socket connection to a given host and port and writes the Hadoop header The Hadoop RPC protocol looks like this when creating a connection: +---------------------------------------------------------------------+ | Header, 4 bytes ("hrpc") | +---------------------------------------------------------------------+ | Version, 1 byte (default verion 9) | +---------------------------------------------------------------------+ | RPC service class, 1 byte (0x00) | +---------------------------------------------------------------------+ | Auth protocol, 1 byte (Auth method None = 0) | +---------------------------------------------------------------------+ | Length of the RpcRequestHeaderProto + length of the | | of the IpcConnectionContextProto (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized delimited RpcRequestHeaderProto | +---------------------------------------------------------------------+ | Serialized delimited IpcConnectionContextProto | +---------------------------------------------------------------------+ ''' log.debug("############## CONNECTING ##############") # Open socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.sock.settimeout(self.sock_connect_timeout / 1000) # Connect socket to server - defined by host and port arguments self.sock.connect((host, port)) self.sock.settimeout(self.sock_request_timeout / 1000) # Send RPC headers self.write(self.RPC_HEADER) # header self.write(struct.pack('B', self.version)) # version self.write(struct.pack('B', self.RPC_SERVICE_CLASS)) # RPC service class if self.use_sasl: self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL)) # serialization type (protobuf = 0xDF) else: self.write(struct.pack('B', self.AUTH_PROTOCOL_NONE)) # serialization type (protobuf = 0) if self.use_sasl: sasl = SaslRpcClient(self, hdfs_namenode_principal=self.hdfs_namenode_principal) sasl_connected = sasl.connect() if not sasl_connected: raise TransientException("SASL is configured, but cannot get connected") rpc_header = self.create_rpc_request_header() context = self.create_connection_context() header_length = len(rpc_header) + encoder._VarintSize(len(rpc_header)) +len(context) + encoder._VarintSize(len(context)) if log.getEffectiveLevel() == logging.DEBUG: log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length)))) self.write(struct.pack('!I', header_length)) self.write_delimited(rpc_header) self.write_delimited(context)