def main(args): """Main run function.""" # Build the gridworld transitions, rewards = gridworld() # print('transitions: {}'.format(transitions)) print('transtions shape: {}'.format(transitions.shape)) # print('rewards: {}'.format(rewards)) print('rewards shape: {}'.format(rewards.shape)) # Tensors now live on the remote workers transitions.fix_precision().share(bob, alice) rewards.fix_precision().share(bob, alice) num_actions = rewards.shape[0] num_states = rewards.shape[1] print('Number of actions: {}'.format(num_actions)) print('Number of states: {}'.format(num_states)) # Initialize a policy to hold the optimal policy policy = sy.zeros(num_states) # Initialize a value function to hold the long-term value of state, s values = sy.zeros(num_states) policy = policy.fix_precision().share(bob, alice) values = values.fix_precision().share(bob, alice) # Get theta and gamma from args and check value gamma = args.gamma * sy.ones(1) theta = args.theta * sy.ones(1) # check theta stopping condition assert float(theta) > 0, "Theta must be greater than 0." # Share theta and gamma for learning gamma = gamma.fix_precision().share(bob, alice) theta = theta.fix_precision().share(bob, alice) # run value iteration values, policy = value_iteration( values=values, policy=policy, transitions=transitions, rewards=rewards, gamma=gamma, theta=theta, max_iter=args.max_iter, ) values = values.get().decode() policy = policy.get().decode() # print results print('\n************************') d_state = (int(np.sqrt(num_states)), int(np.sqrt(num_states))) print('Optimized Values:\n {}'.format(np.reshape(list(values), d_state))) print('Optimized Policy:\n {}'.format(np.reshape(list(policy), d_state)))
def init_grad_(self): """ Initialise grad as an empty tensor """ self.grad = sy.Variable(sy.zeros(self.size()).type(type(self.data))) self.grad.native_set_() self.grad.child.owner = self.owner self.grad.data.child.owner = self.owner
def __init__(self, num_states, num_actions, epsilon=0.1, alpha=0.5, gamma=1.0): self.Q = sy.zeros(num_states, num_actions) self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.actions = range(num_actions)
def assign_grad_(self, var_grad): """ Assign to self.grad any type of variable """ # save the var_grad.data var_grad_data = var_grad.data # Transform var_grad into an envelope compatible with .grad assignment if self.size() != var_grad.size(): var_grad.data = sy.zeros(self.data.size()) var_grad.data = var_grad.data.type(type(self.data)) self.grad = var_grad # put back original var_grad.data self.grad.data = var_grad_data
def gridworld(): """4x4 gridworld example.""" # number of states S = 16 # number of actions A = 4 # indices of the actions up, down, right, left = range(A) # Transitions. T = sy.zeros((A, S, S)) # Grid transitions. grid_transitions = { # from_state: ((action, to_state), ...) 0: ((up, 0), (down, 0), (right, 0), (left, 0)), 1: ((up, 1), (down, 5), (right, 2), (left, 0)), 2: ((up, 2), (down, 6), (right, 3), (left, 1)), 3: ((up, 3), (down, 7), (right, 3), (left, 2)), 4: ((up, 0), (down, 8), (right, 5), (left, 4)), 5: ((up, 1), (down, 9), (right, 6), (left, 4)), 6: ((up, 2), (down, 10), (right, 7), (left, 5)), 7: ((up, 3), (down, 11), (right, 7), (left, 6)), 8: ((up, 4), (down, 12), (right, 9), (left, 8)), 9: ((up, 5), (down, 13), (right, 10), (left, 8)), 10: ((up, 6), (down, 14), (right, 11), (left, 9)), 11: ((up, 7), (down, 15), (right, 11), (left, 10)), 12: ((up, 8), (down, 12), (right, 13), (left, 12)), 13: ((up, 9), (down, 13), (right, 14), (left, 12)), 14: ((up, 10), (down, 14), (right, 15), (left, 13)), 15: ((up, 15), (down, 15), (right, 15), (left, 15)) } for i, moves in grid_transitions.items(): for a, j in moves: T[a, i, j] = 1.0 # Rewards. R = sy.ones((A, S, S)).mul(-1) R[:, 0, :] = 0 R[:, 15, :] = 0 return T, R
def test_zeros(self): self.assertTrue((syft.zeros(5).data == np.zeros(5)).all())
def one_hot(index, length): vect = sy.zeros(length).long() vect[index] = 1 return vect
def zeros(self, dim): """Returns an encrypted tensor of zeros""" return syft.zeros(dim).encrypt(self)
def zeros(self, dim): """Returns an encrypted tensor of zeros""" return PaillierTensor(self, syft.zeros(dim))
def handle_call(cls, syft_command, owner): """ Execute a forwarded command on the native tensor with native operations. Receive a syft command and an owner, and converts it into command with native torch args. Excute native operations and converts it back into syft response using _LocalTensors. """ tensor_command, torch_type = torch_utils.prepare_child_command( syft_command, replace_tensorvar_with_child=True) torch_utils.assert_has_only_torch_tensorvars(tensor_command) attr = tensor_command['command'] args = tensor_command['args'] kwargs = tensor_command['kwargs'] has_self = tensor_command['has_self'] if has_self: self = tensor_command['self'] attr = torch._command_guard(attr, torch.tensorvar_methods) command = getattr(self, "native_" + attr) else: attr = torch._command_guard(attr, torch.torch_modules) elems = attr.split('.') elems[-1] = 'native_' + elems[-1] native_func_name = '.'.join(elems) command = eval(native_func_name) response = command(*args, **kwargs) # TODO : control registration process if response is None: return response if owner.id != owner.hook.local_worker.id: if isinstance(response, (int, float, bool)): response = sy.zeros(1) + response elif isinstance(response, (np.ndarray, )): response = sy.FloatTensor(response) else: if isinstance(response, (int, float, bool, np.ndarray)): return response # If the command is an in-place method, wrap self and return if has_self and utils.is_in_place_method(attr): # wrap the main element torch_utils.wrap_command_with(response, syft_command['self']) if torch_utils.is_variable(response): # Also wrap the data if it's a variable (don't use wrap_command_with: the chain is not well formed yet) syft_command['self'].child.data = response.data response.data.parent = syft_command['self'].child.data.parent # And wrap the grad if there is one if response.grad is not None: if response.grad.data.dim() > 0: syft_command['self'].child.grad = response.grad else: syft_command['self'].child.grad.native_set_() response.grad.parent = syft_command[ 'self'].child.grad.parent # Finally, fix the links .data and .grad if response.grad is None: torch_utils.link_var_chain_to_data_chain( syft_command['self'], response.data.child) else: torch_utils.link_var_chain_to_data_and_grad_chains( syft_command['self'], response.data.child, response.grad.child) return_response = syft_command['self'] # Else, the response if not self. Iterate over the response(s) and wrap with a syft tensor else: responses = response if isinstance(response, tuple) else (response, ) syft_responses = [] for resp in responses: if resp is None: # Don't wrap None syft_responses.append(resp) continue if isinstance(resp, (int, float, bool)): # if not final worker, convert into Float Tensor, which comes with a _LocalTensor if owner.id != owner.hook.local_worker.id: resp = sy.zeros(1) + resp else: # Else don't wrap it syft_responses.append(resp) continue syft_response = sy._LocalTensor(child=resp, parent=resp, owner=owner, torch_type='syft.' + type(resp).__name__) if torch_utils.is_variable(resp): if resp.grad is None: torch_utils.link_var_chain_to_data_chain( syft_response, resp.data.child) else: torch_utils.link_var_chain_to_data_and_grad_chains( syft_response, resp.data.child, resp.grad.child) syft_responses.append(syft_response) return_response = tuple(syft_responses) if len( syft_responses) > 1 else syft_responses[0] return return_response