def _generate_file_over_two_blocks(self, path): f = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'testfiles', 'test3')) p = self.cluster.put_subprocess('-', path) for _ in range( 131072): # 1024 * 131072 = 134,217,728 (default block size) f.seek(0) for line in f.readlines(): print(line, file=p.stdin) print('some extra bytes to exceed one blocksize', file=p.stdin) # +40 p.communicate()
def _buffer_bytes(self, n): to_read = n for _ in range(self.MAX_READ_ATTEMPTS): bytes_read = self.socket.recv(to_read) self.buffer += bytes_read to_read -= len(bytes_read) if to_read == 0: log.debug("Bytes read: %d, total: %d" % (len(bytes_read), self.buffer_length)) return n if len(bytes_read) < n: # we'd like to distinguish transient (e.g. network-related) problems # note: but this error could also be a logic error raise TransientException("RpcBufferedReader only managed to read %s out of %s bytes" % (len(bytes_read), n))
def _write_to_test_cluster(self, testfile, times, dst, block_size=134217728): testfiles_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "testfiles") f = open(''.join([testfiles_path, testfile])) p = self.cluster.put_subprocess('-', dst, block_size) for _ in range(times): f.seek(0) for line in f.readlines(): print(line, file=p.stdin) p.communicate()
def readBlock(self, length, pool_id, block_id, generation_stamp, offset, block_token, check_crc): '''Send a read request to given block. If we receive a successful response, we start reading packets. Send read request: +---------------------------------------------------------------------+ | Data Transfer Protocol Version, 2 bytes | +---------------------------------------------------------------------+ | Op code, 1 byte (READ_BLOCK = 81) | +---------------------------------------------------------------------+ | Delimited serialized OpReadBlockProto (varint len + request) | +---------------------------------------------------------------------+ Receive response: +---------------------------------------------------------------------+ | Delimited BlockOpResponseProto (varint len + response) | +---------------------------------------------------------------------+ Start reading packets. Each packet has the following structure: +---------------------------------------------------------------------+ | Packet length (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized size of header, 2 bytes | +---------------------------------------------------------------------+ | Packet Header Proto | +---------------------------------------------------------------------+ | x checksums, 4 bytes each | +---------------------------------------------------------------------+ | x chunks of payload data | +---------------------------------------------------------------------+ ''' log.debug("%s sending readBlock request" % self) # Send version and opcode self.sock.send(struct.pack('>h', 28)) self.sock.send(struct.pack('b', self.READ_BLOCK)) length = length - offset # Create and send OpReadBlockProto message request = OpReadBlockProto() request.offset = offset request.len = length header = request.header header.clientName = "snakebite" base_header = header.baseHeader # TokenProto token = base_header.token token.identifier = block_token.identifier token.password = block_token.password token.kind = block_token.kind token.service = block_token.service # ExtendedBlockProto block = base_header.block block.poolId = pool_id block.blockId = block_id block.generationStamp = generation_stamp s_request = request.SerializeToString() log_protobuf_message("OpReadBlockProto:", request) self.write_delimited(s_request) byte_stream = RpcBufferedReader(self.sock) block_op_response_bytes = get_delimited_message_bytes(byte_stream)[1] block_op_response = BlockOpResponseProto() block_op_response.ParseFromString(block_op_response_bytes) log_protobuf_message("BlockOpResponseProto", block_op_response) checksum_type = block_op_response.readOpChecksumInfo.checksum.type bytes_per_chunk = block_op_response.readOpChecksumInfo.checksum.bytesPerChecksum log.debug("Checksum type: %s, bytesPerChecksum: %s" % (checksum_type, bytes_per_chunk)) if checksum_type in [self.CHECKSUM_NULL]: checksum_len = 0 elif checksum_type in [self.CHECKSUM_CRC32C, self.CHECKSUM_CRC32]: checksum_len = 4 else: raise FatalException("Checksum type %s not implemented" % checksum_type) total_read = 0 if block_op_response.status == 0: # datatransfer_proto.Status.Value('SUCCESS') while total_read < length: log.debug("== Reading next packet") packet_len = struct.unpack("!I", byte_stream.read(4))[0] log.debug("Packet length: %s", packet_len) serialized_size = struct.unpack("!H", byte_stream.read(2))[0] log.debug("Serialized size: %s", serialized_size) packet_header_bytes = byte_stream.read(serialized_size) packet_header = PacketHeaderProto() packet_header.ParseFromString(packet_header_bytes) log_protobuf_message("PacketHeaderProto", packet_header) data_len = packet_header.dataLen chunks_per_packet = int( (data_len + bytes_per_chunk - 1) / bytes_per_chunk) log.debug("Nr of chunks: %d", chunks_per_packet) data_len = packet_len - 4 - chunks_per_packet * checksum_len log.debug("Payload len: %d", data_len) byte_stream.reset() # Collect checksums if check_crc and checksum_type != self.CHECKSUM_NULL: checksums = [] for _ in range(0, chunks_per_packet): checksum = self._read_bytes(checksum_len) checksum = struct.unpack("!I", checksum)[0] checksums.append(checksum) else: self._read_bytes(checksum_len * chunks_per_packet) # We use a fixed size buffer (a "load") to read only a couple of chunks at once. bytes_per_load = self.LOAD_SIZE - (self.LOAD_SIZE % bytes_per_chunk) chunks_per_load = int(bytes_per_load / bytes_per_chunk) loads_per_packet = int( math.ceil(bytes_per_chunk * chunks_per_packet / bytes_per_load)) read_on_packet = 0 for i in range(loads_per_packet): load = b'' for j in range(chunks_per_load): log.debug("Reading chunk %s in load %s:", j, i) bytes_to_read = min(bytes_per_chunk, data_len - read_on_packet) chunk = self._read_bytes(bytes_to_read) if check_crc and checksum_type != self.CHECKSUM_NULL: checksum_index = i * chunks_per_load + j if checksum_index < len(checksums) and crc( chunk) != checksums[checksum_index]: # it makes sense to retry, so TransientError raise TransientException( "Checksum doesn't match") load += chunk total_read += len(chunk) read_on_packet += len(chunk) yield load # Send ClientReadStatusProto message confirming successful read request = ClientReadStatusProto() request.status = 0 # SUCCESS log_protobuf_message("ClientReadStatusProto:", request) s_request = request.SerializeToString() self.write_delimited(s_request) self._close_socket()