Esempio n. 1
0
def main(args):
    """Main run function."""

    # Build the gridworld
    transitions, rewards = gridworld()

    # print('transitions: {}'.format(transitions))
    print('transtions shape: {}'.format(transitions.shape))
    # print('rewards: {}'.format(rewards))
    print('rewards shape: {}'.format(rewards.shape))

    # Tensors now live on the remote workers
    transitions.fix_precision().share(bob, alice)
    rewards.fix_precision().share(bob, alice)

    num_actions = rewards.shape[0]
    num_states = rewards.shape[1]
    print('Number of actions: {}'.format(num_actions))
    print('Number of states: {}'.format(num_states))

    # Initialize a policy to hold the optimal policy
    policy = sy.zeros(num_states)
    # Initialize a value function to hold the long-term value of state, s
    values = sy.zeros(num_states)
    policy = policy.fix_precision().share(bob, alice)
    values = values.fix_precision().share(bob, alice)

    # Get theta and gamma from args and check value
    gamma = args.gamma * sy.ones(1)
    theta = args.theta * sy.ones(1)
    # check theta stopping condition
    assert float(theta) > 0, "Theta must be greater than 0."

    # Share theta and gamma for learning
    gamma = gamma.fix_precision().share(bob, alice)
    theta = theta.fix_precision().share(bob, alice)

    # run value iteration
    values, policy = value_iteration(
        values=values,
        policy=policy,
        transitions=transitions,
        rewards=rewards,
        gamma=gamma,
        theta=theta,
        max_iter=args.max_iter,
    )
    values = values.get().decode()
    policy = policy.get().decode()

    # print results
    print('\n************************')
    d_state = (int(np.sqrt(num_states)), int(np.sqrt(num_states)))
    print('Optimized Values:\n {}'.format(np.reshape(list(values), d_state)))
    print('Optimized Policy:\n {}'.format(np.reshape(list(policy), d_state)))
Esempio n. 2
0
 def init_grad_(self):
     """
     Initialise grad as an empty tensor
     """
     self.grad = sy.Variable(sy.zeros(self.size()).type(type(self.data)))
     self.grad.native_set_()
     self.grad.child.owner = self.owner
     self.grad.data.child.owner = self.owner
Esempio n. 3
0
    def __init__(self,
                 num_states,
                 num_actions,
                 epsilon=0.1,
                 alpha=0.5,
                 gamma=1.0):

        self.Q = sy.zeros(num_states, num_actions)
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.actions = range(num_actions)
Esempio n. 4
0
    def assign_grad_(self, var_grad):
        """
        Assign to self.grad any type of variable
        """
        # save the var_grad.data
        var_grad_data = var_grad.data

        # Transform var_grad into an envelope compatible with .grad assignment
        if self.size() != var_grad.size():
            var_grad.data = sy.zeros(self.data.size())
        var_grad.data = var_grad.data.type(type(self.data))

        self.grad = var_grad

        # put back original var_grad.data
        self.grad.data = var_grad_data
Esempio n. 5
0
def gridworld():
    """4x4 gridworld example."""
    # number of states
    S = 16

    # number of actions
    A = 4
    # indices of the actions
    up, down, right, left = range(A)

    # Transitions.
    T = sy.zeros((A, S, S))

    # Grid transitions.
    grid_transitions = {
        # from_state: ((action, to_state), ...)
        0: ((up, 0), (down, 0), (right, 0), (left, 0)),
        1: ((up, 1), (down, 5), (right, 2), (left, 0)),
        2: ((up, 2), (down, 6), (right, 3), (left, 1)),
        3: ((up, 3), (down, 7), (right, 3), (left, 2)),
        4: ((up, 0), (down, 8), (right, 5), (left, 4)),
        5: ((up, 1), (down, 9), (right, 6), (left, 4)),
        6: ((up, 2), (down, 10), (right, 7), (left, 5)),
        7: ((up, 3), (down, 11), (right, 7), (left, 6)),
        8: ((up, 4), (down, 12), (right, 9), (left, 8)),
        9: ((up, 5), (down, 13), (right, 10), (left, 8)),
        10: ((up, 6), (down, 14), (right, 11), (left, 9)),
        11: ((up, 7), (down, 15), (right, 11), (left, 10)),
        12: ((up, 8), (down, 12), (right, 13), (left, 12)),
        13: ((up, 9), (down, 13), (right, 14), (left, 12)),
        14: ((up, 10), (down, 14), (right, 15), (left, 13)),
        15: ((up, 15), (down, 15), (right, 15), (left, 15))
    }
    for i, moves in grid_transitions.items():
        for a, j in moves:
            T[a, i, j] = 1.0

    # Rewards.
    R = sy.ones((A, S, S)).mul(-1)
    R[:, 0, :] = 0
    R[:, 15, :] = 0

    return T, R
Esempio n. 6
0
 def test_zeros(self):
     self.assertTrue((syft.zeros(5).data == np.zeros(5)).all())
Esempio n. 7
0
def one_hot(index, length):
    vect = sy.zeros(length).long()
    vect[index] = 1
    return vect
Esempio n. 8
0
 def zeros(self, dim):
     """Returns an encrypted tensor of zeros"""
     return syft.zeros(dim).encrypt(self)
Esempio n. 9
0
 def zeros(self, dim):
     """Returns an encrypted tensor of zeros"""
     return PaillierTensor(self, syft.zeros(dim))
Esempio n. 10
0
    def handle_call(cls, syft_command, owner):
        """
        Execute a forwarded command on the native tensor with native operations.
        Receive a syft command and an owner, and converts it into command with
        native torch args. Excute native operations and converts it back into
        syft response using _LocalTensors.
        """
        tensor_command, torch_type = torch_utils.prepare_child_command(
            syft_command, replace_tensorvar_with_child=True)
        torch_utils.assert_has_only_torch_tensorvars(tensor_command)

        attr = tensor_command['command']
        args = tensor_command['args']
        kwargs = tensor_command['kwargs']
        has_self = tensor_command['has_self']

        if has_self:
            self = tensor_command['self']
            attr = torch._command_guard(attr, torch.tensorvar_methods)
            command = getattr(self, "native_" + attr)
        else:
            attr = torch._command_guard(attr, torch.torch_modules)
            elems = attr.split('.')
            elems[-1] = 'native_' + elems[-1]
            native_func_name = '.'.join(elems)
            command = eval(native_func_name)

        response = command(*args, **kwargs)

        # TODO : control registration process
        if response is None:
            return response

        if owner.id != owner.hook.local_worker.id:
            if isinstance(response, (int, float, bool)):
                response = sy.zeros(1) + response
            elif isinstance(response, (np.ndarray, )):
                response = sy.FloatTensor(response)
        else:
            if isinstance(response, (int, float, bool, np.ndarray)):
                return response

        # If the command is an in-place method, wrap self and return
        if has_self and utils.is_in_place_method(attr):
            # wrap the main element
            torch_utils.wrap_command_with(response, syft_command['self'])

            if torch_utils.is_variable(response):
                # Also wrap the data if it's a variable (don't use wrap_command_with: the chain is not well formed yet)
                syft_command['self'].child.data = response.data
                response.data.parent = syft_command['self'].child.data.parent
                # And wrap the grad if there is one
                if response.grad is not None:
                    if response.grad.data.dim() > 0:
                        syft_command['self'].child.grad = response.grad
                    else:
                        syft_command['self'].child.grad.native_set_()
                    response.grad.parent = syft_command[
                        'self'].child.grad.parent
                # Finally, fix the links .data and .grad
                if response.grad is None:
                    torch_utils.link_var_chain_to_data_chain(
                        syft_command['self'], response.data.child)
                else:
                    torch_utils.link_var_chain_to_data_and_grad_chains(
                        syft_command['self'], response.data.child,
                        response.grad.child)

            return_response = syft_command['self']
        # Else, the response if not self. Iterate over the response(s) and wrap with a syft tensor
        else:
            responses = response if isinstance(response,
                                               tuple) else (response, )
            syft_responses = []
            for resp in responses:
                if resp is None:  # Don't wrap None
                    syft_responses.append(resp)
                    continue

                if isinstance(resp, (int, float, bool)):
                    # if not final worker, convert into Float Tensor, which comes with a _LocalTensor
                    if owner.id != owner.hook.local_worker.id:
                        resp = sy.zeros(1) + resp
                    else:  # Else don't wrap it
                        syft_responses.append(resp)
                        continue

                syft_response = sy._LocalTensor(child=resp,
                                                parent=resp,
                                                owner=owner,
                                                torch_type='syft.' +
                                                type(resp).__name__)

                if torch_utils.is_variable(resp):
                    if resp.grad is None:
                        torch_utils.link_var_chain_to_data_chain(
                            syft_response, resp.data.child)
                    else:
                        torch_utils.link_var_chain_to_data_and_grad_chains(
                            syft_response, resp.data.child, resp.grad.child)

                syft_responses.append(syft_response)

            return_response = tuple(syft_responses) if len(
                syft_responses) > 1 else syft_responses[0]

        return return_response