def _handle_redirect(self, args, response): cmd = args[0] if response[0] == b('!ok'): return response elif response[0] == b('!redo'): return None if response[0][:11] == b('!redirected'): redirected_response = response[0] while response[0] == redirected_response: self.add_blocks(response, args) self.handle_partition_id(args) while True: args_copy = copy.deepcopy(args) if args[0] == QueueOps.enqueue: args_copy.extend(response[-3:]) elif args[0] == QueueOps.dequeue: args_copy.extend(response[-2:]) response = self.blocks[self._block_id( args)].run_command_redirected(args_copy) if response[0] != b('!redo'): break if response[0] == b("!block_moved"): self._refresh() return None return response
def run_command(self, args): resp = None retry = False while resp is None: try: if self.cmd_type[args[0]] == CommandType.accessor: resp = self._run_command(args) else: resp = self._run_command(args) if retry and resp[0] == b('!duplicate_key'): resp[0] = b('!ok') except (TTransportException, socket.timeout) as e: logging.warning("Error in connection to chain {}: {}".format(self.chain.block_ids, e)) rchain = self.fs.resolve_failures(self.path, rpc_replica_chain(self.chain.block_ids, self.chain.name, self.chain.metadata, self.chain.storage_mode)) self.chain = ReplicaChain(rchain.block_ids, rchain.name, rchain.metadata, rchain.storage_mode) logging.warning("Updated chain: {}".format(self.chain.block_ids)) # invalidate the client cache for the failed connection(s) self._invalidate_cache() self._init() retry = True except EOFError: resp = [b('!block_moved')] return resp
def _handle_redirect(self, args, response): if response[0] == b('!full'): sleep(self.wait_time) return None elif response[0] == b('!empty'): sleep(self.wait_time) return None return response
def length(self): tail = int( self._run_repeated( [QueueOps.length, b(str(int(FifoQueueSizeType.TAIL_SIZE)))])[1]) head = int( self._run_repeated( [QueueOps.length, b(str(int(FifoQueueSizeType.HEAD_SIZE)))])[1]) return head - tail
class FileOps: write = b('write') read = b('read') seek = b('seek') op_types = { read: CommandType.accessor, seek: CommandType.accessor, write: CommandType.mutator }
class QueueOps: enqueue = b('enqueue') dequeue = b('dequeue') read_next = b('read_next') op_types = { enqueue: CommandType.mutator, dequeue: CommandType.mutator, read_next: CommandType.accessor }
def trim(self, start_pos, end_pos): start_partition = 0 count = 0 while start_partition + count < len(self.block_info.data_blocks): arg_list = [SharedLogOps.trim, b(str(start_pos)), b(str(end_pos))] self.blocks[start_partition + count].send_command(arg_list) count += 1 ret = [] for k in range(0, count): resp = self.blocks[start_partition + k].recv_response() return True
def encode(value): if isinstance(value, bytes): return value elif isinstance(value, (int, long)): value = b(str(value)) elif isinstance(value, float): value = b(repr(value)) elif not isinstance(value, basestring): value = unicode(value) if isinstance(value, unicode): value = value.encode() return value
def read_ls(self, size): ret = b'' file_size = self.last_partition * self.block_size + self.last_offset if file_size <= self.cur_partition * self.block_size + self.cur_offset: raise KeyError("File offset exceeds the file size") remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset remaining_data = min(remain_size, size) if remaining_data == 0: return b"" #Parallel read here start_partition = self._block_id() count = 0 while remaining_data > 0: count += 1 data_to_read = min( self.cache[self.cur_partition].block_size - (self.cur_offset % self.cache[self.cur_partition].block_size), remaining_data, self.block_size - self.cur_offset) if self.cache[self.cur_partition].exists(self.cur_offset): ret += self.cache[self.cur_partition].hit_handling( self.cur_offset, data_to_read) else: start_offset = (self.cur_offset // self.cache[self.cur_partition].block_size ) * self.cache[self.cur_partition].block_size self.blocks[self._block_id()].send_command([ FileOps.read_ls, b(str(start_offset)), b( str( min( file_size - self.cur_partition * self.block_size - self.cur_offset, self.cache[ self.cur_partition].prefetch_block_num * self.cache[self.cur_partition].block_size))) ]) prefetched_data = self.blocks[ self.cur_partition].recv_response()[-1] self.cache[self.cur_partition].prefetch_handling( start_offset, prefetched_data) ret += self.cache[self.cur_partition].hit_handling( self.cur_offset, data_to_read) remaining_data -= data_to_read self.cur_offset += data_to_read if self.cur_offset == self.block_size and self.cur_partition != self.last_partition: self.cur_offset = 0 self.cur_partition += 1 return ret
class SharedLogOps: write = b('write') scan = b('scan') trim = b('trim') add_blocks = b('add_blocks') get_storage_capacity = b('get_storage_capacity') op_types = { scan: CommandType.accessor, trim: CommandType.mutator, write: CommandType.mutator, add_blocks: CommandType.accessor, get_storage_capacity: CommandType.accessor }
def scan(self, start_pos, end_pos, logical_streams): start_partition = 0 count = 0 while start_partition + count < len(self.block_info.data_blocks): arg_list = [SharedLogOps.scan, b(str(start_pos)), b(str(end_pos))] arg_list += logical_streams self.blocks[start_partition + count].send_command(arg_list) count += 1 ret = [] for k in range(0, count): resp = self.blocks[start_partition + k].recv_response() if len(resp) > 1: ret += resp[1:] return ret
def _handle_redirect(self, args, response): if response[0] == b('!ok'): self.cur_offset += len(response[1]) elif response[0] == b('!redo'): return None elif response[0] == '!split_read': result = b('') while response[0] == b('!split_read'): data_part = response[1] result += data_part if self._need_chain(): chain = response[2].split('!') self.blocks.append( ReplicaChainClient(self.fs, self.path, self.client_cache, chain, FileOps.op_types)) self.cur_partition += 1 self._update_last_partition(self.cur_partition) self.cur_offset = 0 new_args = [ FileOps.read, b(str(self.cur_offset)), b(str(int(args[2]) - len(result))) ] response = self.blocks[self.cur_partition].run_command( new_args) if response[0] == b('!ok'): self.cur_offset += len(response[1]) result += response[1] response[1] = result return response
def _run_repeated(self, args): response = None while response is None: response = self.blocks[self._block_id(args)].run_command(args) response = self._handle_redirect(args, response) self.redo_times = 0 if response[0] != b('!ok'): raise KeyError(bytes_to_str(response[0])) return response
def _handle_redirect(self, args, response): data = args[1] if response[0] == b('!ok'): self.cur_offset += len(data) elif response[0] == b('!redo'): return None elif response[0] == b('split_write'): while response[0] == b('!split_write'): remaining_data_len = int(response[1]) remaining_data = data[len(data) - remaining_data_len:len(data)] if self._need_chain(): chain = response[2].split('!') self.blocks.append( ReplicaChainClient(self.fs, self.path, self.client_cache, chain, FileOps.op_types)) self.cur_partition += 1 self._update_last_partition(self.cur_partition) self.cur_offset = 0 while True: new_args = [ FileOps.write, remaining_data, b(str(self.cur_offset)) ] response = self.blocks[self.cur_partition].run_command( new_args) if response[0] != b('!redo'): break self.cur_offset += len(remaining_data) return response
def add_blocks(self, response, args): if self._block_id(args) >= len(self.blocks) - 1: if self.auto_scale: block_ids = [ bytes_to_str(j) for j in response[1].split(b('!')) ] chain = ReplicaChain(block_ids, 0, 0, rpc_storage_mode.rpc_in_memory) self.blocks.append( ReplicaChainClient(self.fs, self.path, self.client_cache, chain, QueueOps.op_types)) else: raise ValueError
class QueueOps: enqueue = b('enqueue') dequeue = b('dequeue') enqueue_ls = b('enqueue_ls') dequeue_ls = b('dequeue_ls') read_next = b('read_next') read_next_ls = b('read_next_ls') length = b('length') in_rate = b('in_rate') out_rate = b('out_rate') op_types = { enqueue: CommandType.mutator, dequeue: CommandType.mutator, enqueue_ls: CommandType.mutator, dequeue_ls: CommandType.mutator, read_next: CommandType.accessor, read_next_ls: CommandType.accessor, length: CommandType.accessor, in_rate: CommandType.accessor, out_rate: CommandType.accessor }
class FileOps: write = b('write') read = b('read') write_ls = b('write_ls') read_ls = b('read_ls') seek = b('seek') add_blocks = b('add_blocks') get_storage_capacity = b('get_storage_capacity') op_types = { read: CommandType.accessor, seek: CommandType.accessor, write: CommandType.mutator, read_ls: CommandType.accessor, seek: CommandType.accessor, write_ls: CommandType.mutator, add_blocks: CommandType.accessor, get_storage_capacity: CommandType.accessor }
def write_ls(self, data): file_size = (self.last_partition + 1) * self.block_size num_chain_needed = 0 temp_last_offset = -1 if self.cur_partition * self.block_size + self.cur_offset > file_size: num_chain_needed = int(self.cur_partition - self.last_partition) file_size = (self.cur_partition + 1) * self.block_size remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset num_chain_needed += int( (len(data) - remain_size) / self.block_size + ((len(data) - remain_size) % self.block_size != 0)) else: remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset if remain_size < len(data): num_chain_needed = int( (len(data) - remain_size) / self.block_size + ((len(data) - remain_size) % self.block_size != 0)) if num_chain_needed and not self.auto_scale: return -1 # First allocate new blocks if needed while num_chain_needed != 0: _return = self.blocks[self.last_partition].run_command([ FileOps.add_blocks, b(str(self.last_partition)), b(str(num_chain_needed)) ]) if _return[0] == b("!block_allocated"): self.last_partition += num_chain_needed for i in range(num_chain_needed): self.cache.append( FileCache(max_length=self.cache_size, block_size=self.cache_block_size, prefetch_block_num=self.prefetch_size)) temp_last_offset = self.last_offset self.last_offset = 0 num_chain_needed = 0 try: for x in _return[1:]: block_ids = [bytes_to_str(j) for j in x.split(b('!'))] chain = ReplicaChain(block_ids, 0, 0, rpc_storage_mode.rpc_in_memory) self.blocks.append( ReplicaChainClient(self.fs, self.path, self.client_cache, chain, FileOps.op_types)) except: return -1 if self.block_size == self.cur_offset: self.cur_offset = 0 self.cur_partition += 1 # Parallel write remaining_data = len(data) start_partition = self._block_id() count = 0 while remaining_data > 0: count += 1 data_to_write = data[ len(data) - remaining_data:len(data) - remaining_data + min( self.cache[self.cur_partition].block_size - (self.cur_offset % self.cache[self.cur_partition].block_size ), remaining_data, self.block_size - self.cur_offset)] if temp_last_offset >= 0: self.blocks[self._block_id()].send_command([ FileOps.write, data_to_write, b(str(self.cur_offset)), b(str(self.cache[self.cur_partition].block_size)), b(str(temp_last_offset)) ]) else: self.blocks[self._block_id()].send_command([ FileOps.write, data_to_write, b(str(self.cur_offset)), b(str(self.cache[self.cur_partition].block_size)), b(str(self.last_offset)) ]) resp = self.blocks[self.cur_partition].recv_response() self.cache[self.cur_partition].miss_handling( self.cur_offset, resp[-1]) remaining_data -= len(data_to_write) self.cur_offset += len(data_to_write) if self.last_offset < self.cur_offset and self.cur_partition == self.last_partition: self.last_offset = self.cur_offset if self.cur_offset >= self.block_size and self.cur_partition != self.last_partition: self.cur_offset = 0 self.cur_partition += 1 if self.last_partition < self.cur_partition: self.last_partition = self.cur_partition self.last_offset = self.cur_offset return len(data)
def write(self, data): self._run_repeated([FileOps.write, data, b(str(self.cur_offset))])
def read(self, size): return self._run_repeated( [FileOps.read, b(str(self.cur_offset)), b(str(size))])[1]
class HashTableOps: exists = b('exists') get = b('get') put = b('put') remove = b('remove') update = b('update') upsert = b('upsert') exists_ls = b('exists_ls') get_ls = b('get_ls') put_ls = b('put_ls') remove_ls = b('remove_ls') update_ls = b('update_ls') upsert_ls = b('upsert_ls') op_types = { exists: CommandType.accessor, get: CommandType.accessor, put: CommandType.mutator, remove: CommandType.mutator, update: CommandType.accessor, upsert: CommandType.mutator, exists_ls: CommandType.accessor, get_ls: CommandType.accessor, put_ls: CommandType.mutator, remove_ls: CommandType.mutator, update_ls: CommandType.accessor, upsert_ls: CommandType.mutator }
def _handle_redirect(self, args, response): while b(response[0]) == b('!exporting'): args_copy = copy.deepcopy(args) if args[0] == b("update") or args[0] == b("upsert"): args_copy += [response[2], response[3]] block_ids = [bytes_to_str(x) for x in response[1].split(b('!'))] chain = ReplicaChain(block_ids, 0, 0, rpc_storage_mode.rpc_in_memory) while True: response = ReplicaChainClient( self.fs, self.path, self.client_cache, chain, HashTableOps.op_types).run_command_redirected(args_copy) if b(response[0]) != b("!redo"): break if b(response[0]) == b('!block_moved'): self._refresh() return None if b(response[0]) == b('!full'): time.sleep(0.001 * math.pow(2, self.redo_times)) self.redo_times += 1 return None if b(response[0]) == b("!redo"): return None return response
def write(self, pos, data_, logical_streams): file_size = (self.last_partition + 1) * self.block_size num_chain_needed = 0 data = "" for ls in logical_streams: data += ls data += data_ if self.cur_partition * self.block_size + self.cur_offset > file_size: num_chain_needed = int(self.cur_partition - self.last_partition) file_size = (self.cur_partition + 1) * self.block_size remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset num_chain_needed += int( (len(data) - remain_size) / self.block_size + ((len(data) - remain_size) % self.block_size != 0)) else: remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset if remain_size < len(data): num_chain_needed = int( (len(data) - remain_size) / self.block_size + ((len(data) - remain_size) % self.block_size != 0)) if num_chain_needed and not self.auto_scale: return -1 # First allocate new blocks if needed while num_chain_needed != 0: _return = self.blocks[self.last_partition].run_command([ SharedLogOps.add_blocks, b(str(self.last_partition)), b(str(num_chain_needed)) ]) if _return[0] == b("!block_allocated"): self.last_partition += num_chain_needed self.last_offset = 0 num_chain_needed = 0 try: for x in _return[1:]: block_ids = [bytes_to_str(j) for j in x.split(b('!'))] chain = ReplicaChain(block_ids, 0, 0, rpc_storage_mode.rpc_in_memory) self.blocks.append( ReplicaChainClient(self.fs, self.path, self.client_cache, chain, SharedLogOps.op_types)) except: return -1 if self.block_size == self.cur_offset: self.cur_offset = 0 self.cur_partition += 1 # Parallel write remaining_data = len(data) start_partition = self._block_id() count = 0 while remaining_data > 0: count += 1 if len(data) > self.block_size - self.cur_offset: self.cur_offset = 0 self.cur_partition += 1 if self.last_partition < self.cur_partition: self.last_partition = self.cur_partition self.last_offset = self.cur_offset arg_list = [SharedLogOps.write, b(str(pos)), data_] arg_list += logical_streams self.blocks[self._block_id()].send_command(arg_list) remaining_data -= len(data) self.cur_offset += len(data) if self.last_offset < self.cur_offset and self.cur_partition == self.last_partition: self.last_offset = self.cur_offset for i in range(0, count): self.blocks[start_partition + i].recv_response() return len(data)
def _run_command_redirected(self, args): if args[-1] != b('!redirected'): args.append(b('!redirected')) self.send_command(args) return self._recv_response()