def main(args): """Main run function.""" # Build the gridworld transitions, rewards = gridworld() # print('transitions: {}'.format(transitions)) print('transtions shape: {}'.format(transitions.shape)) # print('rewards: {}'.format(rewards)) print('rewards shape: {}'.format(rewards.shape)) # Tensors now live on the remote workers transitions.fix_precision().share(bob, alice) rewards.fix_precision().share(bob, alice) num_actions = rewards.shape[0] num_states = rewards.shape[1] print('Number of actions: {}'.format(num_actions)) print('Number of states: {}'.format(num_states)) # Initialize a policy to hold the optimal policy policy = sy.zeros(num_states) # Initialize a value function to hold the long-term value of state, s values = sy.zeros(num_states) policy = policy.fix_precision().share(bob, alice) values = values.fix_precision().share(bob, alice) # Get theta and gamma from args and check value gamma = args.gamma * sy.ones(1) theta = args.theta * sy.ones(1) # check theta stopping condition assert float(theta) > 0, "Theta must be greater than 0." # Share theta and gamma for learning gamma = gamma.fix_precision().share(bob, alice) theta = theta.fix_precision().share(bob, alice) # run value iteration values, policy = value_iteration( values=values, policy=policy, transitions=transitions, rewards=rewards, gamma=gamma, theta=theta, max_iter=args.max_iter, ) values = values.get().decode() policy = policy.get().decode() # print results print('\n************************') d_state = (int(np.sqrt(num_states)), int(np.sqrt(num_states))) print('Optimized Values:\n {}'.format(np.reshape(list(values), d_state))) print('Optimized Policy:\n {}'.format(np.reshape(list(policy), d_state)))
def testDotProduct(self): pk, sk = Paillier() x = pk.ones(10) y = sy.ones(10) out1 = y.dot(x).decrypt(sk) out2 = x.dot(y).decrypt(sk) self.assertEqual(out1, 10) self.assertEqual(out2, 10)
def gridworld(): """4x4 gridworld example.""" # number of states S = 16 # number of actions A = 4 # indices of the actions up, down, right, left = range(A) # Transitions. T = sy.zeros((A, S, S)) # Grid transitions. grid_transitions = { # from_state: ((action, to_state), ...) 0: ((up, 0), (down, 0), (right, 0), (left, 0)), 1: ((up, 1), (down, 5), (right, 2), (left, 0)), 2: ((up, 2), (down, 6), (right, 3), (left, 1)), 3: ((up, 3), (down, 7), (right, 3), (left, 2)), 4: ((up, 0), (down, 8), (right, 5), (left, 4)), 5: ((up, 1), (down, 9), (right, 6), (left, 4)), 6: ((up, 2), (down, 10), (right, 7), (left, 5)), 7: ((up, 3), (down, 11), (right, 7), (left, 6)), 8: ((up, 4), (down, 12), (right, 9), (left, 8)), 9: ((up, 5), (down, 13), (right, 10), (left, 8)), 10: ((up, 6), (down, 14), (right, 11), (left, 9)), 11: ((up, 7), (down, 15), (right, 11), (left, 10)), 12: ((up, 8), (down, 12), (right, 13), (left, 12)), 13: ((up, 9), (down, 13), (right, 14), (left, 12)), 14: ((up, 10), (down, 14), (right, 15), (left, 13)), 15: ((up, 15), (down, 15), (right, 15), (left, 15)) } for i, moves in grid_transitions.items(): for a, j in moves: T[a, i, j] = 1.0 # Rewards. R = sy.ones((A, S, S)).mul(-1) R[:, 0, :] = 0 R[:, 15, :] = 0 return T, R
def test_ones(self): self.assertTrue((syft.ones(5).data == np.ones(5)).all())
def ones(self, dim): """Returns an encrypted tensor of ones""" return syft.ones(dim).encrypt(self)
def ones(self, dim): """Returns an encrypted tensor of ones""" return PaillierTensor(self, syft.ones(dim))