def test_forward(self):
     """ Compare forward pass to pytorch implementation """
     x = random_tensor_2d(10)
     torch_tanh = torch.nn.Tanh()
     mytorch_tanh = mytorch.nn.Tanh()
     t1, t2 = torch_tanh(x), mytorch_tanh(x)
     tt.assert_equal(t1, t2)
 def test_forward(self):
     """ Compare forward pass to pytorch implementation """
     x = random_tensor_2d(10)
     torch_relu = torch.nn.ReLU()
     mytorch_relu = mytorch.nn.ReLU()
     r1, r2 = torch_relu(x), mytorch_relu(x)
     tt.assert_equal(r1, r2)
    def test_simple(self):
        buffer = GeneralizedAdvantageBuffer(self.v,
                                            self.features,
                                            2,
                                            1,
                                            discount_factor=0.5,
                                            lam=0.5)
        actions = torch.ones((1))
        states = State(torch.arange(0, 3).unsqueeze(1))
        rewards = torch.tensor([1., 2, 4])
        buffer.store(states[0], actions, rewards[0])
        buffer.store(states[1], actions, rewards[1])

        values = self.v.eval(self.features.eval(states))
        tt.assert_almost_equal(values,
                               torch.tensor([0.1826, -0.3476, -0.8777]),
                               decimal=3)

        td_errors = torch.zeros(2)
        td_errors[0] = rewards[0] + 0.5 * values[1] - values[0]
        td_errors[1] = rewards[1] + 0.5 * values[2] - values[1]
        tt.assert_almost_equal(td_errors,
                               torch.tensor([0.6436, 1.909]),
                               decimal=3)

        advantages = torch.zeros(2)
        advantages[0] = td_errors[0] + 0.25 * td_errors[1]
        advantages[1] = td_errors[1]
        tt.assert_almost_equal(advantages,
                               torch.tensor([1.121, 1.909]),
                               decimal=3)

        _states, _actions, _advantages = buffer.advantages(states[2])
        tt.assert_almost_equal(_advantages, advantages)
        tt.assert_equal(_actions, torch.tensor([1, 1]))
    def test_backward(self):
        """ Compare backward pass to pytorch implementation """
        batch_size = 100
        n_in = 10
        n_out = 25
        x = torch.rand((batch_size, n_in), requires_grad=True)

        torch_lin = torch.nn.Linear(n_in, n_out)
        mytorch_lin = mytorch.nn.Linear(n_in, n_out)

        # reset weights and biases
        rand_weight = torch.rand(torch_lin.weight.size())
        torch_lin.weight = torch.nn.Parameter(rand_weight)
        mytorch_lin.weight = rand_weight

        rand_bias = torch.rand(torch_lin.bias.size())
        torch_lin.bias = torch.nn.Parameter(rand_bias)
        mytorch_lin.bias = rand_bias

        l1 = torch_lin(x)
        l1.backward(torch.ones_like(l1))
        gradwrtinput1 = x.grad

        l2 = mytorch_lin(x)
        gradwrtinput2 = mytorch_lin.backward(torch.ones_like(l2))

        tt.assert_equal(gradwrtinput1, gradwrtinput2)
 def test_auto_mask(self):
     observation = torch.randn(3, 4)
     state = StateArray({
         'observation': observation,
         'done': torch.tensor([True, False, True]),
     }, (3,))
     tt.assert_equal(state.mask, torch.tensor([0., 1., 0.]))
 def test_run(self):
     states = torch.arange(0, 20)
     actions = torch.arange(0, 20).view((-1, 1))
     rewards = torch.arange(0, 20)
     expected_samples = torch.tensor([
         [0, 0, 0],
         [1, 1, 0],
         [0, 1, 1],
         [3, 0, 0],
         [1, 4, 4],
         [1, 2, 4],
         [2, 4, 3],
         [4, 7, 4],
         [7, 4, 6],
         [6, 5, 6],
     ])
     expected_weights = np.ones((10, 3))
     actual_samples = []
     actual_weights = []
     for i in range(10):
         state = State(states[i].unsqueeze(0), torch.tensor([1]))
         next_state = State(states[i + 1].unsqueeze(0), torch.tensor([1]))
         self.replay_buffer.store(state, actions[i], rewards[i], next_state)
         sample = self.replay_buffer.sample(3)
         actual_samples.append(sample[0].features)
         actual_weights.append(sample[-1])
     tt.assert_equal(
         torch.cat(actual_samples).view(expected_samples.shape),
         expected_samples)
     np.testing.assert_array_equal(expected_weights,
                                   np.vstack(actual_weights))
 def test_constructor_defaults(self):
     raw = torch.randn(3, 4)
     state = State(raw, (3,))
     tt.assert_equal(state.observation, raw)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state.done, False)
     self.assertEqual(state.reward, 0.)
    def test_repeat_actions(self):
        done = torch.ones(14)
        done[3] = 0
        done[5] = 0
        states = State(torch.arange(0, 14), done)
        rewards = torch.ones(2)

        agent = MockAgent(2)
        body = ParallelRepeatActions(agent, repeats=3)

        actions = body.act(states[0:2], rewards)
        self.assert_array_equal(actions, [1, 1])
        actions = body.act(states[2:4], rewards)
        self.assert_array_equal(actions, [1, None])
        actions = body.act(states[4:6], rewards)
        self.assert_array_equal(actions, [1, None])
        actions = body.act(states[6:8], rewards)
        self.assert_array_equal(actions, [2, 2])
        actions = body.act(states[8:10], rewards)
        self.assert_array_equal(actions, [2, 2])
        actions = body.act(states[10:12], rewards)
        self.assert_array_equal(actions, [2, 2])
        actions = body.act(states[12:14], rewards)
        self.assert_array_equal(actions, [3, 3])

        self.assertEqual(len(agent._states), 3)
        tt.assert_equal(torch.cat(agent._rewards),
                        torch.tensor([[1, 1], [3, 3], [3, 3]]))
Beispiel #9
0
    def test_rollout_with_nones(self):
        buffer = NStepBatchBuffer(3, 3, discount_factor=0.5)
        done = torch.ones(12)
        done[5] = 0
        done[7] = 0
        done[9] = 0
        states = State(torch.arange(0, 12), done)
        actions = torch.ones((3))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        buffer.store(states[9:12], actions, 4 * torch.ones(3))
        states, actions, returns, next_states, lengths = buffer.sample(-1)

        expected_states = State(torch.arange(0, 9), done[0:9])
        expected_next_done = torch.zeros(9)
        expected_next_done[5] = 1
        expected_next_done[7] = 1
        expected_next_done[8] = 1
        expect_next_states = State(
            torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]), expected_next_done)
        expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float()
        expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float()

        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expect_next_states)
        tt.assert_equal(lengths, expected_lengths)
        tt.assert_allclose(returns, expected_returns)
Beispiel #10
0
    def test_rollout(self):
        buffer = NStepBuffer(2, discount_factor=0.5)
        actions = torch.ones((3))
        states = State(torch.arange(0, 12))
        buffer.store(states[0:3], actions, torch.zeros(3))
        buffer.store(states[3:6], actions, torch.ones(3))
        buffer.store(states[6:9], actions, 2 * torch.ones(3))
        buffer.store(states[9:12], actions, 4 * torch.ones(3))
        self.assertEqual(len(buffer), 6)

        states, actions, returns, next_states, lengths = buffer.sample(6)
        expected_states = State(torch.arange(0, 6))
        expected_next_states = State(torch.arange(6, 12))
        expected_returns = torch.tensor([
            2,
            2,
            2,
            4,
            4,
            4,
        ]).float()
        expected_lengths = torch.tensor([
            2,
            2,
            2,
            2,
            2,
            2,
        ])
        self.assert_states_equal(states, expected_states)
        self.assert_states_equal(next_states, expected_next_states)
        tt.assert_allclose(returns, expected_returns)
        tt.assert_equal(lengths, expected_lengths)
    def test_target(self):
        self.policy = DeterministicPolicy(self.model,
                                          self.optimizer,
                                          self.space,
                                          target=FixedTarget(3))
        state = State(torch.ones(1, STATE_DIM))

        # run update step, make sure target network doesn't change
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # again...
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM))

        # third time, target should be updated
        self.policy(state).sum().backward()
        self.policy.step()
        tt.assert_allclose(
            self.policy.target(state),
            torch.tensor([[-0.574482, -0.574482, -0.574482]]),
            atol=1e-4,
        )
Beispiel #12
0
    def test_pick_one_round(self):
        with patch('torch.randn_like') as randn_like:
            do_nothing = lambda: None
            sanitizer = ModelSanitizer(1, 1, 1, 1, 1, 1, 1, 1)
            sanitizer.sanitize_init = Mock(side_effect=do_nothing)
            sanitizer.model_size = 5
            sanitizer.release_size = 1
            sanitizer.epochs = 1
            sanitizer.gamma = 10000
            sanitizer.tau = 2
            sanitizer._noise1_distributions = Mock()
            sanitizer._noise2_distributions = Mock()
            sanitizer._noise3_distributions = Mock()
            sanitizer._noise1_distributions.sample.return_value = torch.tensor(
                [0., 0., 0., 0., 0.])
            sanitizer._noise2_distributions.sample.return_value = torch.tensor(
                [0., 0., 0., 0., 0.])
            sanitizer._noise3_distributions.sample.return_value = torch.tensor(
                [0., 0., 0., 0., 0.])
            randn_like.return_value = torch.tensor([0., 0., 0., 0., 0.])
            sanitizer._base_params = [torch.zeros(5)]

            dest_model = torch.nn.Module()
            dest_model.layer1 = torch.nn.Parameter(
                torch.tensor([1., 1., 3., 4., 5.]))

            sanitizer.sanitize(dest_model)

            result_params = [*dest_model.parameters()][0]
            torch_testing.assert_equal(result_params,
                                       torch.tensor([0., 0., 3., 4., 5.]))
Beispiel #13
0
 def test_get_item(self):
     raw = torch.randn(3, 4)
     states = State(raw)
     state = states[2]
     tt.assert_equal(state.raw, raw[2].unsqueeze(0))
     tt.assert_equal(state.mask, NOT_DONE)
     self.assertEqual(state.info, [None])
 def test_from_gym_reset(self):
     observation = np.array([1, 2, 3])
     state = State.from_gym(observation)
     tt.assert_equal(state.observation, torch.from_numpy(observation))
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state.done, False)
     self.assertEqual(state.reward, 0.)
     self.assertEqual(state.shape, ())
 def test_constructor_defaults(self):
     observation = torch.randn(3, 4)
     state = State(observation)
     tt.assert_equal(state.observation, observation)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state.done, False)
     self.assertEqual(state.reward, 0.)
     self.assertEqual(state.shape, ())
Beispiel #16
0
 def test_from_gym(self):
     gym_obs = np.array([1, 2, 3])
     done = True
     info = 'a'
     state = State.from_gym(gym_obs, done, info)
     tt.assert_equal(state.raw, torch.tensor([[1, 2, 3]]))
     tt.assert_equal(state.mask, DONE)
     self.assertEqual(state.info, ['a'])
Beispiel #17
0
 def test_custom_constructor_args(self):
     raw = torch.randn(3, 4)
     mask = torch.zeros(3)
     info = ['a', 'b', 'c']
     state = State(raw, mask=mask, info=info)
     tt.assert_equal(state.features, raw)
     tt.assert_equal(state.mask, torch.zeros(3))
     self.assertEqual(state.info, info)
 def test_from_gym_step(self):
     observation = np.array([1, 2, 3])
     state = State.from_gym((observation, 2., True, {'coolInfo': 3.}))
     tt.assert_equal(state.observation, torch.from_numpy(observation))
     self.assertEqual(state.mask, 0.)
     self.assertEqual(state.done, True)
     self.assertEqual(state.reward, 2.)
     self.assertEqual(state['coolInfo'], 3.)
     self.assertEqual(state.shape, ())
Beispiel #19
0
 def test_from_list(self):
     state1 = State(torch.randn(1, 4), mask=DONE, info=['a'])
     state2 = State(torch.randn(1, 4), mask=NOT_DONE, info=['b'])
     state3 = State(torch.randn(1, 4))
     state = State.from_list([state1, state2, state3])
     tt.assert_equal(state.raw,
                     torch.cat((state1.raw, state2.raw, state3.raw)))
     tt.assert_equal(state.mask, torch.tensor([0, 1, 1]))
     self.assertEqual(state.info, ['a', 'b', None])
Beispiel #20
0
 def test_list(self):
     torch.manual_seed(1)
     states = State(torch.randn(3, STATE_DIM), torch.tensor([1, 0, 1]))
     dist = self.policy(states)
     actions = dist.sample()
     log_probs = dist.log_prob(actions)
     tt.assert_equal(actions, torch.tensor([1, 2, 1]))
     loss = -(torch.tensor([[1, 2, 3]]) * log_probs).mean()
     self.policy.reinforce(loss)
Beispiel #21
0
 def test_build(self):
     layers_dict = {
         1: torch.Tensor([[-1,5,-7],[2,-3,1],[0,0,0]]),
         2: torch.Tensor([2,-3,4,-5,-1,7,2,-4,3,-2,8])
     }
     # should prune 0s and 1s (also -1s)
     mask = _build_pruning_mask(layers_dict, .3)
     
     tt.assert_equal(mask[1].int(), torch.Tensor([[0,1,1],[1,1,0],[0,0,0]]))
     tt.assert_equal(mask[2].int(), torch.Tensor([1,1,1,1,0,1,1,1,1,1,1]))
 def test_eval(self):
     states = State(torch.randn(3, STATE_DIM), torch.tensor([1, 1, 1]))
     dist = self.policy.no_grad(states)
     tt.assert_almost_equal(dist.probs, torch.tensor([
         [0.352, 0.216, 0.432],
         [0.266, 0.196, 0.538],
         [0.469, 0.227, 0.304]
     ]), decimal=3)
     best = self.policy.eval(states).sample()
     tt.assert_equal(best, torch.tensor([2, 2, 0]))
Beispiel #23
0
 def test_with_equal_2_dimensional_tensors(self):
     a = torch.tensor([
         [23.65, 9.3, 5.2],
         [8.2, 1.1, 9],
     ])
     b = torch.tensor([
         [23.65, 9.3, 5.2],
         [8.2, 1.1, 9],
     ])
     tt.assert_equal(a, b)
 def test_pool(self):
     img_arr = np.zeros(shape=(1, 1, 500, 500))
     img_arr[:, :, 200:, :200] = 1
     img_tens = torch.from_numpy(img_arr)
     phoc_net = PHOCNet(n_out=1)
     pooled = phoc_net.pool(img_tens)
     compared = torch.nn.functional.max_pool2d(
         img_tens,
         kernel_size=phoc_net.kernel_pooling,
         stride=phoc_net.stride_pooling,
         padding=phoc_net.padding_pooling)
     tt.assert_equal(pooled, compared)
 def test_deflicker(self):
     frame1 = State(torch.ones((1, 3, 4, 4)))
     frame2 = State(torch.ones((1, 3, 4, 4)))
     frame3 = State(torch.ones((1, 3, 4, 4)) * 2)
     self.body.act(frame1, 0)
     self.body.act(frame2, 0)
     self.body.act(frame3, 0)
     self.body.act(frame2, 0)
     self.body.act(frame2, 0)
     expected = torch.cat((torch.ones(1, 2, 2), torch.ones(2, 2, 2) * 2,
                           torch.ones(1, 2, 2))).unsqueeze(0)
     tt.assert_equal(self.agent.state.features, expected)
 def test_from_dict(self):
     observation = torch.randn(3, 4)
     state = State({
         'observation': observation,
         'done': True,
         'mask': 1,
         'reward': 5.
     })
     tt.assert_equal(state.observation, observation)
     self.assertEqual(state.done, True)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state.reward, 5.)
Beispiel #27
0
    def test_with_unequal_2_dimensional_tensors(self):
        a = torch.tensor([
            [23.65, 9.3, 5.2],
            [8.2, 1.1, 9],
        ])
        b = torch.tensor([
            [23.65, 9.3, 5.2],
            [8.2, 1.2, 9],
        ])

        with self.assertRaisesRegex(AssertionError, 'Arrays are not equal'):
            tt.assert_equal(a, b)
Beispiel #28
0
    def testFFTCentralFreqBatch(self):
        # Same for batches
        for gpu in [True, False]:
            x = torch.FloatTensor(4, 10, 10, 2).fill_(0)
            x.narrow(3, 0, 1).fill_(1)
            if gpu:
                x = x.cuda()

            a = x.sum()
            fft = sl.Fft()
            fft(x, inplace=True)
            c = x[:, 0, 0, 0].sum()
            tt.assert_equal(a.cpu(), c.cpu())
 def test_terminal_state(self):
     self.env.reset()
     self.env.step(self.body.act(self.env.state, 0))
     for _ in range(11):
         reward = -5  # should be clipped
         action = self.body.act(self.env.state, reward)
         self.env.step(action)
     # pylint: disable=protected-access
     self.env.state._mask = torch.tensor([0])
     self.body.act(self.env.state, -1)
     tt.assert_equal(action, ACT_ACTION)
     self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
     self.assertEqual(self.agent.reward, -4)
    def test_normalize(self):
        batch = [(torch.FloatTensor([[1.0], [0.75], [0.5],
                                     [0.5]]), torch.FloatTensor([[2]])),
                 (torch.FloatTensor([[1.5], [2], [1.5], [0],
                                     [-2]]), torch.FloatTensor([[6]]))]

        expected_offsets = torch.FloatTensor([-0.5, 2])
        expected_factors = torch.FloatTensor([2, 0.25])
        expected_normalized_batch = [
            (torch.FloatTensor([[1], [0.5], [0],
                                [0]]), torch.FloatTensor([[3]])),
            (torch.FloatTensor([[3.5 / 4], [1], [3.5 / 4], [0.5],
                                [0]]), torch.FloatTensor([[2]]))
        ]

        normalized_batch, factors, offsets = self.interface.__normalize_batch__(
            batch)
        tt.assert_equal(offsets, expected_offsets)
        tt.assert_equal(factors, expected_factors)
        for i, normalized_element in enumerate(normalized_batch):
            normalized_x, normalized_y = normalized_element
            expected_normalized_x, expected_normalized_y = expected_normalized_batch[
                i]

            tt.assert_equal(normalized_x, expected_normalized_x)
            tt.assert_equal(normalized_y, expected_normalized_y)