Exemple #1
0
 def _handle_redirect(self, args, response):
     cmd = args[0]
     if response[0] == b('!ok'):
         return response
     elif response[0] == b('!redo'):
         return None
     if response[0][:11] == b('!redirected'):
         redirected_response = response[0]
         while response[0] == redirected_response:
             self.add_blocks(response, args)
             self.handle_partition_id(args)
             while True:
                 args_copy = copy.deepcopy(args)
                 if args[0] == QueueOps.enqueue:
                     args_copy.extend(response[-3:])
                 elif args[0] == QueueOps.dequeue:
                     args_copy.extend(response[-2:])
                 response = self.blocks[self._block_id(
                     args)].run_command_redirected(args_copy)
                 if response[0] != b('!redo'):
                     break
     if response[0] == b("!block_moved"):
         self._refresh()
         return None
     return response
 def run_command(self, args):
     resp = None
     retry = False
     while resp is None:
         try:
             if self.cmd_type[args[0]] == CommandType.accessor:
                 resp = self._run_command(args)
             else:
                 resp = self._run_command(args)
                 if retry and resp[0] == b('!duplicate_key'):
                     resp[0] = b('!ok')
         except (TTransportException, socket.timeout) as e:
             logging.warning("Error in connection to chain {}: {}".format(self.chain.block_ids, e))
             rchain = self.fs.resolve_failures(self.path, rpc_replica_chain(self.chain.block_ids,
                                                                            self.chain.name,
                                                                            self.chain.metadata,
                                                                            self.chain.storage_mode))
             self.chain = ReplicaChain(rchain.block_ids, rchain.name, rchain.metadata, rchain.storage_mode)
             logging.warning("Updated chain: {}".format(self.chain.block_ids))
             # invalidate the client cache for the failed connection(s)
             self._invalidate_cache()
             self._init()
             retry = True
         except EOFError:
             resp = [b('!block_moved')]
     return resp
Exemple #3
0
 def _handle_redirect(self, args, response):
     if response[0] == b('!full'):
         sleep(self.wait_time)
         return None
     elif response[0] == b('!empty'):
         sleep(self.wait_time)
         return None
     return response
Exemple #4
0
 def length(self):
     tail = int(
         self._run_repeated(
             [QueueOps.length,
              b(str(int(FifoQueueSizeType.TAIL_SIZE)))])[1])
     head = int(
         self._run_repeated(
             [QueueOps.length,
              b(str(int(FifoQueueSizeType.HEAD_SIZE)))])[1])
     return head - tail
Exemple #5
0
class FileOps:
    write = b('write')
    read = b('read')
    seek = b('seek')

    op_types = {
        read: CommandType.accessor,
        seek: CommandType.accessor,
        write: CommandType.mutator
    }
Exemple #6
0
class QueueOps:
    enqueue = b('enqueue')
    dequeue = b('dequeue')
    read_next = b('read_next')

    op_types = {
        enqueue: CommandType.mutator,
        dequeue: CommandType.mutator,
        read_next: CommandType.accessor
    }
Exemple #7
0
 def trim(self, start_pos, end_pos):
     start_partition = 0
     count = 0
     while start_partition + count < len(self.block_info.data_blocks):
         arg_list = [SharedLogOps.trim, b(str(start_pos)), b(str(end_pos))]
         self.blocks[start_partition + count].send_command(arg_list)
         count += 1
     ret = []
     for k in range(0, count):
         resp = self.blocks[start_partition + k].recv_response()
     return True
Exemple #8
0
def encode(value):
    if isinstance(value, bytes):
        return value
    elif isinstance(value, (int, long)):
        value = b(str(value))
    elif isinstance(value, float):
        value = b(repr(value))
    elif not isinstance(value, basestring):
        value = unicode(value)
    if isinstance(value, unicode):
        value = value.encode()
    return value
Exemple #9
0
    def read_ls(self, size):
        ret = b''
        file_size = self.last_partition * self.block_size + self.last_offset
        if file_size <= self.cur_partition * self.block_size + self.cur_offset:
            raise KeyError("File offset exceeds the file size")
        remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset
        remaining_data = min(remain_size, size)
        if remaining_data == 0:
            return b""
        #Parallel read here
        start_partition = self._block_id()
        count = 0
        while remaining_data > 0:
            count += 1
            data_to_read = min(
                self.cache[self.cur_partition].block_size -
                (self.cur_offset % self.cache[self.cur_partition].block_size),
                remaining_data, self.block_size - self.cur_offset)
            if self.cache[self.cur_partition].exists(self.cur_offset):
                ret += self.cache[self.cur_partition].hit_handling(
                    self.cur_offset, data_to_read)
            else:
                start_offset = (self.cur_offset //
                                self.cache[self.cur_partition].block_size
                                ) * self.cache[self.cur_partition].block_size
                self.blocks[self._block_id()].send_command([
                    FileOps.read_ls,
                    b(str(start_offset)),
                    b(
                        str(
                            min(
                                file_size -
                                self.cur_partition * self.block_size -
                                self.cur_offset, self.cache[
                                    self.cur_partition].prefetch_block_num *
                                self.cache[self.cur_partition].block_size)))
                ])
                prefetched_data = self.blocks[
                    self.cur_partition].recv_response()[-1]
                self.cache[self.cur_partition].prefetch_handling(
                    start_offset, prefetched_data)
                ret += self.cache[self.cur_partition].hit_handling(
                    self.cur_offset, data_to_read)
            remaining_data -= data_to_read
            self.cur_offset += data_to_read
            if self.cur_offset == self.block_size and self.cur_partition != self.last_partition:
                self.cur_offset = 0
                self.cur_partition += 1

        return ret
Exemple #10
0
class SharedLogOps:
    write = b('write')
    scan = b('scan')
    trim = b('trim')
    add_blocks = b('add_blocks')
    get_storage_capacity = b('get_storage_capacity')

    op_types = {
        scan: CommandType.accessor,
        trim: CommandType.mutator,
        write: CommandType.mutator,
        add_blocks: CommandType.accessor,
        get_storage_capacity: CommandType.accessor
    }
Exemple #11
0
    def scan(self, start_pos, end_pos, logical_streams):
        start_partition = 0
        count = 0

        while start_partition + count < len(self.block_info.data_blocks):
            arg_list = [SharedLogOps.scan, b(str(start_pos)), b(str(end_pos))]
            arg_list += logical_streams
            self.blocks[start_partition + count].send_command(arg_list)
            count += 1
        ret = []
        for k in range(0, count):
            resp = self.blocks[start_partition + k].recv_response()
            if len(resp) > 1:
                ret += resp[1:]
        return ret
Exemple #12
0
    def _handle_redirect(self, args, response):
        if response[0] == b('!ok'):
            self.cur_offset += len(response[1])
        elif response[0] == b('!redo'):
            return None
        elif response[0] == '!split_read':
            result = b('')
            while response[0] == b('!split_read'):
                data_part = response[1]
                result += data_part
                if self._need_chain():
                    chain = response[2].split('!')
                    self.blocks.append(
                        ReplicaChainClient(self.fs, self.path,
                                           self.client_cache, chain,
                                           FileOps.op_types))
                self.cur_partition += 1
                self._update_last_partition(self.cur_partition)
                self.cur_offset = 0

                new_args = [
                    FileOps.read,
                    b(str(self.cur_offset)),
                    b(str(int(args[2]) - len(result)))
                ]
                response = self.blocks[self.cur_partition].run_command(
                    new_args)

                if response[0] == b('!ok'):
                    self.cur_offset += len(response[1])
                    result += response[1]
            response[1] = result
        return response
Exemple #13
0
 def _run_repeated(self, args):
     response = None
     while response is None:
         response = self.blocks[self._block_id(args)].run_command(args)
         response = self._handle_redirect(args, response)
     self.redo_times = 0
     if response[0] != b('!ok'):
         raise KeyError(bytes_to_str(response[0]))
     return response
Exemple #14
0
    def _handle_redirect(self, args, response):
        data = args[1]
        if response[0] == b('!ok'):
            self.cur_offset += len(data)
        elif response[0] == b('!redo'):
            return None
        elif response[0] == b('split_write'):
            while response[0] == b('!split_write'):
                remaining_data_len = int(response[1])
                remaining_data = data[len(data) - remaining_data_len:len(data)]

                if self._need_chain():
                    chain = response[2].split('!')
                    self.blocks.append(
                        ReplicaChainClient(self.fs, self.path,
                                           self.client_cache, chain,
                                           FileOps.op_types))

                self.cur_partition += 1
                self._update_last_partition(self.cur_partition)
                self.cur_offset = 0

                while True:
                    new_args = [
                        FileOps.write, remaining_data,
                        b(str(self.cur_offset))
                    ]
                    response = self.blocks[self.cur_partition].run_command(
                        new_args)
                    if response[0] != b('!redo'):
                        break
                self.cur_offset += len(remaining_data)
        return response
Exemple #15
0
 def add_blocks(self, response, args):
     if self._block_id(args) >= len(self.blocks) - 1:
         if self.auto_scale:
             block_ids = [
                 bytes_to_str(j) for j in response[1].split(b('!'))
             ]
             chain = ReplicaChain(block_ids, 0, 0,
                                  rpc_storage_mode.rpc_in_memory)
             self.blocks.append(
                 ReplicaChainClient(self.fs, self.path, self.client_cache,
                                    chain, QueueOps.op_types))
         else:
             raise ValueError
Exemple #16
0
class QueueOps:
    enqueue = b('enqueue')
    dequeue = b('dequeue')
    enqueue_ls = b('enqueue_ls')
    dequeue_ls = b('dequeue_ls')
    read_next = b('read_next')
    read_next_ls = b('read_next_ls')
    length = b('length')
    in_rate = b('in_rate')
    out_rate = b('out_rate')

    op_types = {
        enqueue: CommandType.mutator,
        dequeue: CommandType.mutator,
        enqueue_ls: CommandType.mutator,
        dequeue_ls: CommandType.mutator,
        read_next: CommandType.accessor,
        read_next_ls: CommandType.accessor,
        length: CommandType.accessor,
        in_rate: CommandType.accessor,
        out_rate: CommandType.accessor
    }
Exemple #17
0
class FileOps:
    write = b('write')
    read = b('read')
    write_ls = b('write_ls')
    read_ls = b('read_ls')
    seek = b('seek')
    add_blocks = b('add_blocks')
    get_storage_capacity = b('get_storage_capacity')

    op_types = {
        read: CommandType.accessor,
        seek: CommandType.accessor,
        write: CommandType.mutator,
        read_ls: CommandType.accessor,
        seek: CommandType.accessor,
        write_ls: CommandType.mutator,
        add_blocks: CommandType.accessor,
        get_storage_capacity: CommandType.accessor
    }
Exemple #18
0
    def write_ls(self, data):
        file_size = (self.last_partition + 1) * self.block_size
        num_chain_needed = 0
        temp_last_offset = -1
        if self.cur_partition * self.block_size + self.cur_offset > file_size:
            num_chain_needed = int(self.cur_partition - self.last_partition)
            file_size = (self.cur_partition + 1) * self.block_size
            remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset
            num_chain_needed += int(
                (len(data) - remain_size) / self.block_size +
                ((len(data) - remain_size) % self.block_size != 0))
        else:
            remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset
            if remain_size < len(data):
                num_chain_needed = int(
                    (len(data) - remain_size) / self.block_size +
                    ((len(data) - remain_size) % self.block_size != 0))

        if num_chain_needed and not self.auto_scale:
            return -1
        # First allocate new blocks if needed
        while num_chain_needed != 0:
            _return = self.blocks[self.last_partition].run_command([
                FileOps.add_blocks,
                b(str(self.last_partition)),
                b(str(num_chain_needed))
            ])
            if _return[0] == b("!block_allocated"):
                self.last_partition += num_chain_needed
                for i in range(num_chain_needed):
                    self.cache.append(
                        FileCache(max_length=self.cache_size,
                                  block_size=self.cache_block_size,
                                  prefetch_block_num=self.prefetch_size))
                temp_last_offset = self.last_offset
                self.last_offset = 0
                num_chain_needed = 0
                try:
                    for x in _return[1:]:
                        block_ids = [bytes_to_str(j) for j in x.split(b('!'))]
                        chain = ReplicaChain(block_ids, 0, 0,
                                             rpc_storage_mode.rpc_in_memory)
                        self.blocks.append(
                            ReplicaChainClient(self.fs, self.path,
                                               self.client_cache, chain,
                                               FileOps.op_types))
                except:
                    return -1
        if self.block_size == self.cur_offset:
            self.cur_offset = 0
            self.cur_partition += 1
        # Parallel write
        remaining_data = len(data)
        start_partition = self._block_id()
        count = 0
        while remaining_data > 0:
            count += 1
            data_to_write = data[
                len(data) - remaining_data:len(data) - remaining_data + min(
                    self.cache[self.cur_partition].block_size -
                    (self.cur_offset %
                     self.cache[self.cur_partition].block_size
                     ), remaining_data, self.block_size - self.cur_offset)]
            if temp_last_offset >= 0:
                self.blocks[self._block_id()].send_command([
                    FileOps.write, data_to_write,
                    b(str(self.cur_offset)),
                    b(str(self.cache[self.cur_partition].block_size)),
                    b(str(temp_last_offset))
                ])
            else:
                self.blocks[self._block_id()].send_command([
                    FileOps.write, data_to_write,
                    b(str(self.cur_offset)),
                    b(str(self.cache[self.cur_partition].block_size)),
                    b(str(self.last_offset))
                ])
            resp = self.blocks[self.cur_partition].recv_response()
            self.cache[self.cur_partition].miss_handling(
                self.cur_offset, resp[-1])
            remaining_data -= len(data_to_write)
            self.cur_offset += len(data_to_write)
            if self.last_offset < self.cur_offset and self.cur_partition == self.last_partition:
                self.last_offset = self.cur_offset
            if self.cur_offset >= self.block_size and self.cur_partition != self.last_partition:
                self.cur_offset = 0
                self.cur_partition += 1
                if self.last_partition < self.cur_partition:
                    self.last_partition = self.cur_partition
                    self.last_offset = self.cur_offset

        return len(data)
Exemple #19
0
 def write(self, data):
     self._run_repeated([FileOps.write, data, b(str(self.cur_offset))])
Exemple #20
0
 def read(self, size):
     return self._run_repeated(
         [FileOps.read, b(str(self.cur_offset)),
          b(str(size))])[1]
Exemple #21
0
class HashTableOps:
    exists = b('exists')
    get = b('get')
    put = b('put')
    remove = b('remove')
    update = b('update')
    upsert = b('upsert')
    exists_ls = b('exists_ls')
    get_ls = b('get_ls')
    put_ls = b('put_ls')
    remove_ls = b('remove_ls')
    update_ls = b('update_ls')
    upsert_ls = b('upsert_ls')

    op_types = {
        exists: CommandType.accessor,
        get: CommandType.accessor,
        put: CommandType.mutator,
        remove: CommandType.mutator,
        update: CommandType.accessor,
        upsert: CommandType.mutator,
        exists_ls: CommandType.accessor,
        get_ls: CommandType.accessor,
        put_ls: CommandType.mutator,
        remove_ls: CommandType.mutator,
        update_ls: CommandType.accessor,
        upsert_ls: CommandType.mutator
    }
Exemple #22
0
 def _handle_redirect(self, args, response):
     while b(response[0]) == b('!exporting'):
         args_copy = copy.deepcopy(args)
         if args[0] == b("update") or args[0] == b("upsert"):
             args_copy += [response[2], response[3]]
         block_ids = [bytes_to_str(x) for x in response[1].split(b('!'))]
         chain = ReplicaChain(block_ids, 0, 0,
                              rpc_storage_mode.rpc_in_memory)
         while True:
             response = ReplicaChainClient(
                 self.fs, self.path, self.client_cache, chain,
                 HashTableOps.op_types).run_command_redirected(args_copy)
             if b(response[0]) != b("!redo"):
                 break
     if b(response[0]) == b('!block_moved'):
         self._refresh()
         return None
     if b(response[0]) == b('!full'):
         time.sleep(0.001 * math.pow(2, self.redo_times))
         self.redo_times += 1
         return None
     if b(response[0]) == b("!redo"):
         return None
     return response
Exemple #23
0
    def write(self, pos, data_, logical_streams):
        file_size = (self.last_partition + 1) * self.block_size
        num_chain_needed = 0

        data = ""
        for ls in logical_streams:
            data += ls
        data += data_

        if self.cur_partition * self.block_size + self.cur_offset > file_size:
            num_chain_needed = int(self.cur_partition - self.last_partition)
            file_size = (self.cur_partition + 1) * self.block_size
            remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset
            num_chain_needed += int(
                (len(data) - remain_size) / self.block_size +
                ((len(data) - remain_size) % self.block_size != 0))
        else:
            remain_size = file_size - self.cur_partition * self.block_size - self.cur_offset
            if remain_size < len(data):
                num_chain_needed = int(
                    (len(data) - remain_size) / self.block_size +
                    ((len(data) - remain_size) % self.block_size != 0))

        if num_chain_needed and not self.auto_scale:
            return -1
        # First allocate new blocks if needed
        while num_chain_needed != 0:
            _return = self.blocks[self.last_partition].run_command([
                SharedLogOps.add_blocks,
                b(str(self.last_partition)),
                b(str(num_chain_needed))
            ])
            if _return[0] == b("!block_allocated"):
                self.last_partition += num_chain_needed
                self.last_offset = 0
                num_chain_needed = 0
                try:
                    for x in _return[1:]:
                        block_ids = [bytes_to_str(j) for j in x.split(b('!'))]
                        chain = ReplicaChain(block_ids, 0, 0,
                                             rpc_storage_mode.rpc_in_memory)
                        self.blocks.append(
                            ReplicaChainClient(self.fs, self.path,
                                               self.client_cache, chain,
                                               SharedLogOps.op_types))
                except:
                    return -1
        if self.block_size == self.cur_offset:
            self.cur_offset = 0
            self.cur_partition += 1
        # Parallel write
        remaining_data = len(data)
        start_partition = self._block_id()
        count = 0
        while remaining_data > 0:
            count += 1
            if len(data) > self.block_size - self.cur_offset:
                self.cur_offset = 0
                self.cur_partition += 1
                if self.last_partition < self.cur_partition:
                    self.last_partition = self.cur_partition
                    self.last_offset = self.cur_offset
            arg_list = [SharedLogOps.write, b(str(pos)), data_]
            arg_list += logical_streams

            self.blocks[self._block_id()].send_command(arg_list)
            remaining_data -= len(data)
            self.cur_offset += len(data)
            if self.last_offset < self.cur_offset and self.cur_partition == self.last_partition:
                self.last_offset = self.cur_offset

        for i in range(0, count):
            self.blocks[start_partition + i].recv_response()

        return len(data)
 def _run_command_redirected(self, args):
     if args[-1] != b('!redirected'):
         args.append(b('!redirected'))
     self.send_command(args)
     return self._recv_response()