Esempio n. 1
0
    def __init__(self,
                 host,
                 port,
                 version,
                 effective_user=None,
                 use_sasl=False,
                 hdfs_namenode_principal=None,
                 sock_connect_timeout=10000,
                 sock_request_timeout=10000):
        '''SocketRpcChannel to connect to a socket server on a user defined port.
           It possible to define version and effective user for the communication.'''
        self.host = host
        self.port = port
        self.sock = None
        self.call_id = -3  # First time (when the connection context is sent, the call_id should be -3, otherwise start with 0 and increment)
        self.version = version
        self.client_id = str(uuid.uuid4())
        self.use_sasl = use_sasl
        self.hdfs_namenode_principal = hdfs_namenode_principal
        if self.use_sasl:
            if not _kerberos_available:
                raise FatalException(
                    "Kerberos libs not found. Please install snakebite using 'pip install snakebite[kerberos]'"
                )

            kerberos = Kerberos()
            self.effective_user = effective_user or kerberos.user_principal(
            ).name
        else:
            self.effective_user = effective_user or get_current_username()
        self.sock_connect_timeout = sock_connect_timeout
        self.sock_request_timeout = sock_request_timeout
Esempio n. 2
0
    def validate_request(self, request):
        '''Validate the client request against the protocol file.'''

        # Check the request is correctly initialized
        if not request.IsInitialized():
            raise FatalException(
                "Client request (%s) is missing mandatory fields" %
                type(request))
Esempio n. 3
0
    def readBlock(self, length, pool_id, block_id, generation_stamp, offset, block_token, check_crc):
        '''Send a read request to given block. If we receive a successful response,
        we start reading packets.

        Send read request:
        +---------------------------------------------------------------------+
        |  Data Transfer Protocol Version, 2 bytes                            |
        +---------------------------------------------------------------------+
        |  Op code, 1 byte (READ_BLOCK = 81)                                  |
        +---------------------------------------------------------------------+
        |  Delimited serialized OpReadBlockProto (varint len + request)       |
        +---------------------------------------------------------------------+

        Receive response:
        +---------------------------------------------------------------------+
        |  Delimited BlockOpResponseProto (varint len + response)             |
        +---------------------------------------------------------------------+

        Start reading packets. Each packet has the following structure:
        +---------------------------------------------------------------------+
        |  Packet length (4 bytes/32 bit int)                                 |
        +---------------------------------------------------------------------+
        |  Serialized size of header, 2 bytes                                 |
        +---------------------------------------------------------------------+
        |  Packet Header Proto                                                |
        +---------------------------------------------------------------------+
        |  x checksums, 4 bytes each                                          |
        +---------------------------------------------------------------------+
        |  x chunks of payload data                                           |
        +---------------------------------------------------------------------+

        '''
        log.debug("%s sending readBlock request" % self)

        # Send version and opcode
        self.sock.send(struct.pack('>h', 28))
        self.sock.send(struct.pack('b', self.READ_BLOCK))
        length = length - offset

        # Create and send OpReadBlockProto message
        request = OpReadBlockProto()
        request.offset = offset
        request.len = length
        header = request.header
        header.clientName = "snakebite"
        base_header = header.baseHeader
        # TokenProto
        token = base_header.token
        token.identifier = block_token.identifier
        token.password = block_token.password
        token.kind = block_token.kind
        token.service = block_token.service
        # ExtendedBlockProto
        block = base_header.block
        block.poolId = pool_id
        block.blockId = block_id
        block.generationStamp = generation_stamp
        s_request = request.SerializeToString()
        log_protobuf_message("OpReadBlockProto:", request)
        self.write_delimited(s_request)

        byte_stream = RpcBufferedReader(self.sock)
        block_op_response_bytes = get_delimited_message_bytes(byte_stream)[1]

        block_op_response = BlockOpResponseProto()
        block_op_response.ParseFromString(block_op_response_bytes)
        log_protobuf_message("BlockOpResponseProto", block_op_response)

        checksum_type = block_op_response.readOpChecksumInfo.checksum.type
        bytes_per_chunk = block_op_response.readOpChecksumInfo.checksum.bytesPerChecksum
        log.debug("Checksum type: %s, bytesPerChecksum: %s" % (checksum_type, bytes_per_chunk))
        if checksum_type in [self.CHECKSUM_NULL]:
            checksum_len = 0
        elif checksum_type in [self.CHECKSUM_CRC32C, self.CHECKSUM_CRC32]:
            checksum_len = 4
        else:
            raise FatalException("Checksum type %s not implemented" % checksum_type)

        total_read = 0
        if block_op_response.status == 0:  # datatransfer_proto.Status.Value('SUCCESS')
            while total_read < length:
                log.debug("== Reading next packet")

                packet_len = struct.unpack("!I", byte_stream.read(4))[0]
                log.debug("Packet length: %s", packet_len)

                serialized_size = struct.unpack("!H", byte_stream.read(2))[0]
                log.debug("Serialized size: %s", serialized_size)

                packet_header_bytes = byte_stream.read(serialized_size)
                packet_header = PacketHeaderProto()
                packet_header.ParseFromString(packet_header_bytes)
                log_protobuf_message("PacketHeaderProto", packet_header)

                data_len = packet_header.dataLen

                chunks_per_packet = int((data_len + bytes_per_chunk - 1) / bytes_per_chunk)
                log.debug("Nr of chunks: %d", chunks_per_packet)

                data_len = packet_len - 4 - chunks_per_packet * checksum_len
                log.debug("Payload len: %d", data_len)

                byte_stream.reset()

                # Collect checksums
                if check_crc and checksum_type != self.CHECKSUM_NULL:
                    checksums = []
                    for _ in xrange(0, chunks_per_packet):
                        checksum = self._read_bytes(checksum_len)
                        checksum = struct.unpack("!I", checksum)[0]
                        checksums.append(checksum)
                else:
                    self._read_bytes(checksum_len * chunks_per_packet)

                # We use a fixed size buffer (a "load") to read only a couple of chunks at once. 
                bytes_per_load = self.LOAD_SIZE - (self.LOAD_SIZE % bytes_per_chunk)
                chunks_per_load = int(bytes_per_load / bytes_per_chunk)
                loads_per_packet = int(math.ceil(bytes_per_chunk * chunks_per_packet / bytes_per_load))

                read_on_packet = 0
                for i in range(loads_per_packet):
                    load = ''
                    for j in range(chunks_per_load):
                        log.debug("Reading chunk %s in load %s:", j, i)
                        bytes_to_read = min(bytes_per_chunk, data_len - read_on_packet)
                        chunk = self._read_bytes(bytes_to_read)
                        if check_crc and checksum_type != self.CHECKSUM_NULL:
                            checksum_index = i * chunks_per_load + j
                            if checksum_index < len(checksums) and crc(chunk) != checksums[checksum_index]:
                                # it makes sense to retry, so TransientError
                                raise TransientException("Checksum doesn't match")
                        load += chunk
                        total_read += len(chunk)
                        read_on_packet += len(chunk)
                    yield load
           
            # Send ClientReadStatusProto message confirming successful read
            request = ClientReadStatusProto()
            request.status = 0  # SUCCESS
            log_protobuf_message("ClientReadStatusProto:", request)
            s_request = request.SerializeToString()
            self.write_delimited(s_request)
            self._close_socket()