def test_forward(self): """ Compare forward pass to pytorch implementation """ x = random_tensor_2d(10) torch_tanh = torch.nn.Tanh() mytorch_tanh = mytorch.nn.Tanh() t1, t2 = torch_tanh(x), mytorch_tanh(x) tt.assert_equal(t1, t2)
def test_forward(self): """ Compare forward pass to pytorch implementation """ x = random_tensor_2d(10) torch_relu = torch.nn.ReLU() mytorch_relu = mytorch.nn.ReLU() r1, r2 = torch_relu(x), mytorch_relu(x) tt.assert_equal(r1, r2)
def test_simple(self): buffer = GeneralizedAdvantageBuffer(self.v, self.features, 2, 1, discount_factor=0.5, lam=0.5) actions = torch.ones((1)) states = State(torch.arange(0, 3).unsqueeze(1)) rewards = torch.tensor([1., 2, 4]) buffer.store(states[0], actions, rewards[0]) buffer.store(states[1], actions, rewards[1]) values = self.v.eval(self.features.eval(states)) tt.assert_almost_equal(values, torch.tensor([0.1826, -0.3476, -0.8777]), decimal=3) td_errors = torch.zeros(2) td_errors[0] = rewards[0] + 0.5 * values[1] - values[0] td_errors[1] = rewards[1] + 0.5 * values[2] - values[1] tt.assert_almost_equal(td_errors, torch.tensor([0.6436, 1.909]), decimal=3) advantages = torch.zeros(2) advantages[0] = td_errors[0] + 0.25 * td_errors[1] advantages[1] = td_errors[1] tt.assert_almost_equal(advantages, torch.tensor([1.121, 1.909]), decimal=3) _states, _actions, _advantages = buffer.advantages(states[2]) tt.assert_almost_equal(_advantages, advantages) tt.assert_equal(_actions, torch.tensor([1, 1]))
def test_backward(self): """ Compare backward pass to pytorch implementation """ batch_size = 100 n_in = 10 n_out = 25 x = torch.rand((batch_size, n_in), requires_grad=True) torch_lin = torch.nn.Linear(n_in, n_out) mytorch_lin = mytorch.nn.Linear(n_in, n_out) # reset weights and biases rand_weight = torch.rand(torch_lin.weight.size()) torch_lin.weight = torch.nn.Parameter(rand_weight) mytorch_lin.weight = rand_weight rand_bias = torch.rand(torch_lin.bias.size()) torch_lin.bias = torch.nn.Parameter(rand_bias) mytorch_lin.bias = rand_bias l1 = torch_lin(x) l1.backward(torch.ones_like(l1)) gradwrtinput1 = x.grad l2 = mytorch_lin(x) gradwrtinput2 = mytorch_lin.backward(torch.ones_like(l2)) tt.assert_equal(gradwrtinput1, gradwrtinput2)
def test_auto_mask(self): observation = torch.randn(3, 4) state = StateArray({ 'observation': observation, 'done': torch.tensor([True, False, True]), }, (3,)) tt.assert_equal(state.mask, torch.tensor([0., 1., 0.]))
def test_run(self): states = torch.arange(0, 20) actions = torch.arange(0, 20).view((-1, 1)) rewards = torch.arange(0, 20) expected_samples = torch.tensor([ [0, 0, 0], [1, 1, 0], [0, 1, 1], [3, 0, 0], [1, 4, 4], [1, 2, 4], [2, 4, 3], [4, 7, 4], [7, 4, 6], [6, 5, 6], ]) expected_weights = np.ones((10, 3)) actual_samples = [] actual_weights = [] for i in range(10): state = State(states[i].unsqueeze(0), torch.tensor([1])) next_state = State(states[i + 1].unsqueeze(0), torch.tensor([1])) self.replay_buffer.store(state, actions[i], rewards[i], next_state) sample = self.replay_buffer.sample(3) actual_samples.append(sample[0].features) actual_weights.append(sample[-1]) tt.assert_equal( torch.cat(actual_samples).view(expected_samples.shape), expected_samples) np.testing.assert_array_equal(expected_weights, np.vstack(actual_weights))
def test_constructor_defaults(self): raw = torch.randn(3, 4) state = State(raw, (3,)) tt.assert_equal(state.observation, raw) self.assertEqual(state.mask, 1.) self.assertEqual(state.done, False) self.assertEqual(state.reward, 0.)
def test_repeat_actions(self): done = torch.ones(14) done[3] = 0 done[5] = 0 states = State(torch.arange(0, 14), done) rewards = torch.ones(2) agent = MockAgent(2) body = ParallelRepeatActions(agent, repeats=3) actions = body.act(states[0:2], rewards) self.assert_array_equal(actions, [1, 1]) actions = body.act(states[2:4], rewards) self.assert_array_equal(actions, [1, None]) actions = body.act(states[4:6], rewards) self.assert_array_equal(actions, [1, None]) actions = body.act(states[6:8], rewards) self.assert_array_equal(actions, [2, 2]) actions = body.act(states[8:10], rewards) self.assert_array_equal(actions, [2, 2]) actions = body.act(states[10:12], rewards) self.assert_array_equal(actions, [2, 2]) actions = body.act(states[12:14], rewards) self.assert_array_equal(actions, [3, 3]) self.assertEqual(len(agent._states), 3) tt.assert_equal(torch.cat(agent._rewards), torch.tensor([[1, 1], [3, 3], [3, 3]]))
def test_rollout_with_nones(self): buffer = NStepBatchBuffer(3, 3, discount_factor=0.5) done = torch.ones(12) done[5] = 0 done[7] = 0 done[9] = 0 states = State(torch.arange(0, 12), done) actions = torch.ones((3)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) buffer.store(states[9:12], actions, 4 * torch.ones(3)) states, actions, returns, next_states, lengths = buffer.sample(-1) expected_states = State(torch.arange(0, 9), done[0:9]) expected_next_done = torch.zeros(9) expected_next_done[5] = 1 expected_next_done[7] = 1 expected_next_done[8] = 1 expect_next_states = State( torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]), expected_next_done) expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float() expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float() self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expect_next_states) tt.assert_equal(lengths, expected_lengths) tt.assert_allclose(returns, expected_returns)
def test_rollout(self): buffer = NStepBuffer(2, discount_factor=0.5) actions = torch.ones((3)) states = State(torch.arange(0, 12)) buffer.store(states[0:3], actions, torch.zeros(3)) buffer.store(states[3:6], actions, torch.ones(3)) buffer.store(states[6:9], actions, 2 * torch.ones(3)) buffer.store(states[9:12], actions, 4 * torch.ones(3)) self.assertEqual(len(buffer), 6) states, actions, returns, next_states, lengths = buffer.sample(6) expected_states = State(torch.arange(0, 6)) expected_next_states = State(torch.arange(6, 12)) expected_returns = torch.tensor([ 2, 2, 2, 4, 4, 4, ]).float() expected_lengths = torch.tensor([ 2, 2, 2, 2, 2, 2, ]) self.assert_states_equal(states, expected_states) self.assert_states_equal(next_states, expected_next_states) tt.assert_allclose(returns, expected_returns) tt.assert_equal(lengths, expected_lengths)
def test_target(self): self.policy = DeterministicPolicy(self.model, self.optimizer, self.space, target=FixedTarget(3)) state = State(torch.ones(1, STATE_DIM)) # run update step, make sure target network doesn't change self.policy(state).sum().backward() self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # again... self.policy(state).sum().backward() self.policy.step() tt.assert_equal(self.policy.target(state), torch.zeros(1, ACTION_DIM)) # third time, target should be updated self.policy(state).sum().backward() self.policy.step() tt.assert_allclose( self.policy.target(state), torch.tensor([[-0.574482, -0.574482, -0.574482]]), atol=1e-4, )
def test_pick_one_round(self): with patch('torch.randn_like') as randn_like: do_nothing = lambda: None sanitizer = ModelSanitizer(1, 1, 1, 1, 1, 1, 1, 1) sanitizer.sanitize_init = Mock(side_effect=do_nothing) sanitizer.model_size = 5 sanitizer.release_size = 1 sanitizer.epochs = 1 sanitizer.gamma = 10000 sanitizer.tau = 2 sanitizer._noise1_distributions = Mock() sanitizer._noise2_distributions = Mock() sanitizer._noise3_distributions = Mock() sanitizer._noise1_distributions.sample.return_value = torch.tensor( [0., 0., 0., 0., 0.]) sanitizer._noise2_distributions.sample.return_value = torch.tensor( [0., 0., 0., 0., 0.]) sanitizer._noise3_distributions.sample.return_value = torch.tensor( [0., 0., 0., 0., 0.]) randn_like.return_value = torch.tensor([0., 0., 0., 0., 0.]) sanitizer._base_params = [torch.zeros(5)] dest_model = torch.nn.Module() dest_model.layer1 = torch.nn.Parameter( torch.tensor([1., 1., 3., 4., 5.])) sanitizer.sanitize(dest_model) result_params = [*dest_model.parameters()][0] torch_testing.assert_equal(result_params, torch.tensor([0., 0., 3., 4., 5.]))
def test_get_item(self): raw = torch.randn(3, 4) states = State(raw) state = states[2] tt.assert_equal(state.raw, raw[2].unsqueeze(0)) tt.assert_equal(state.mask, NOT_DONE) self.assertEqual(state.info, [None])
def test_from_gym_reset(self): observation = np.array([1, 2, 3]) state = State.from_gym(observation) tt.assert_equal(state.observation, torch.from_numpy(observation)) self.assertEqual(state.mask, 1.) self.assertEqual(state.done, False) self.assertEqual(state.reward, 0.) self.assertEqual(state.shape, ())
def test_constructor_defaults(self): observation = torch.randn(3, 4) state = State(observation) tt.assert_equal(state.observation, observation) self.assertEqual(state.mask, 1.) self.assertEqual(state.done, False) self.assertEqual(state.reward, 0.) self.assertEqual(state.shape, ())
def test_from_gym(self): gym_obs = np.array([1, 2, 3]) done = True info = 'a' state = State.from_gym(gym_obs, done, info) tt.assert_equal(state.raw, torch.tensor([[1, 2, 3]])) tt.assert_equal(state.mask, DONE) self.assertEqual(state.info, ['a'])
def test_custom_constructor_args(self): raw = torch.randn(3, 4) mask = torch.zeros(3) info = ['a', 'b', 'c'] state = State(raw, mask=mask, info=info) tt.assert_equal(state.features, raw) tt.assert_equal(state.mask, torch.zeros(3)) self.assertEqual(state.info, info)
def test_from_gym_step(self): observation = np.array([1, 2, 3]) state = State.from_gym((observation, 2., True, {'coolInfo': 3.})) tt.assert_equal(state.observation, torch.from_numpy(observation)) self.assertEqual(state.mask, 0.) self.assertEqual(state.done, True) self.assertEqual(state.reward, 2.) self.assertEqual(state['coolInfo'], 3.) self.assertEqual(state.shape, ())
def test_from_list(self): state1 = State(torch.randn(1, 4), mask=DONE, info=['a']) state2 = State(torch.randn(1, 4), mask=NOT_DONE, info=['b']) state3 = State(torch.randn(1, 4)) state = State.from_list([state1, state2, state3]) tt.assert_equal(state.raw, torch.cat((state1.raw, state2.raw, state3.raw))) tt.assert_equal(state.mask, torch.tensor([0, 1, 1])) self.assertEqual(state.info, ['a', 'b', None])
def test_list(self): torch.manual_seed(1) states = State(torch.randn(3, STATE_DIM), torch.tensor([1, 0, 1])) dist = self.policy(states) actions = dist.sample() log_probs = dist.log_prob(actions) tt.assert_equal(actions, torch.tensor([1, 2, 1])) loss = -(torch.tensor([[1, 2, 3]]) * log_probs).mean() self.policy.reinforce(loss)
def test_build(self): layers_dict = { 1: torch.Tensor([[-1,5,-7],[2,-3,1],[0,0,0]]), 2: torch.Tensor([2,-3,4,-5,-1,7,2,-4,3,-2,8]) } # should prune 0s and 1s (also -1s) mask = _build_pruning_mask(layers_dict, .3) tt.assert_equal(mask[1].int(), torch.Tensor([[0,1,1],[1,1,0],[0,0,0]])) tt.assert_equal(mask[2].int(), torch.Tensor([1,1,1,1,0,1,1,1,1,1,1]))
def test_eval(self): states = State(torch.randn(3, STATE_DIM), torch.tensor([1, 1, 1])) dist = self.policy.no_grad(states) tt.assert_almost_equal(dist.probs, torch.tensor([ [0.352, 0.216, 0.432], [0.266, 0.196, 0.538], [0.469, 0.227, 0.304] ]), decimal=3) best = self.policy.eval(states).sample() tt.assert_equal(best, torch.tensor([2, 2, 0]))
def test_with_equal_2_dimensional_tensors(self): a = torch.tensor([ [23.65, 9.3, 5.2], [8.2, 1.1, 9], ]) b = torch.tensor([ [23.65, 9.3, 5.2], [8.2, 1.1, 9], ]) tt.assert_equal(a, b)
def test_pool(self): img_arr = np.zeros(shape=(1, 1, 500, 500)) img_arr[:, :, 200:, :200] = 1 img_tens = torch.from_numpy(img_arr) phoc_net = PHOCNet(n_out=1) pooled = phoc_net.pool(img_tens) compared = torch.nn.functional.max_pool2d( img_tens, kernel_size=phoc_net.kernel_pooling, stride=phoc_net.stride_pooling, padding=phoc_net.padding_pooling) tt.assert_equal(pooled, compared)
def test_deflicker(self): frame1 = State(torch.ones((1, 3, 4, 4))) frame2 = State(torch.ones((1, 3, 4, 4))) frame3 = State(torch.ones((1, 3, 4, 4)) * 2) self.body.act(frame1, 0) self.body.act(frame2, 0) self.body.act(frame3, 0) self.body.act(frame2, 0) self.body.act(frame2, 0) expected = torch.cat((torch.ones(1, 2, 2), torch.ones(2, 2, 2) * 2, torch.ones(1, 2, 2))).unsqueeze(0) tt.assert_equal(self.agent.state.features, expected)
def test_from_dict(self): observation = torch.randn(3, 4) state = State({ 'observation': observation, 'done': True, 'mask': 1, 'reward': 5. }) tt.assert_equal(state.observation, observation) self.assertEqual(state.done, True) self.assertEqual(state.mask, 1.) self.assertEqual(state.reward, 5.)
def test_with_unequal_2_dimensional_tensors(self): a = torch.tensor([ [23.65, 9.3, 5.2], [8.2, 1.1, 9], ]) b = torch.tensor([ [23.65, 9.3, 5.2], [8.2, 1.2, 9], ]) with self.assertRaisesRegex(AssertionError, 'Arrays are not equal'): tt.assert_equal(a, b)
def testFFTCentralFreqBatch(self): # Same for batches for gpu in [True, False]: x = torch.FloatTensor(4, 10, 10, 2).fill_(0) x.narrow(3, 0, 1).fill_(1) if gpu: x = x.cuda() a = x.sum() fft = sl.Fft() fft(x, inplace=True) c = x[:, 0, 0, 0].sum() tt.assert_equal(a.cpu(), c.cpu())
def test_terminal_state(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) for _ in range(11): reward = -5 # should be clipped action = self.body.act(self.env.state, reward) self.env.step(action) # pylint: disable=protected-access self.env.state._mask = torch.tensor([0]) self.body.act(self.env.state, -1) tt.assert_equal(action, ACT_ACTION) self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) self.assertEqual(self.agent.reward, -4)
def test_normalize(self): batch = [(torch.FloatTensor([[1.0], [0.75], [0.5], [0.5]]), torch.FloatTensor([[2]])), (torch.FloatTensor([[1.5], [2], [1.5], [0], [-2]]), torch.FloatTensor([[6]]))] expected_offsets = torch.FloatTensor([-0.5, 2]) expected_factors = torch.FloatTensor([2, 0.25]) expected_normalized_batch = [ (torch.FloatTensor([[1], [0.5], [0], [0]]), torch.FloatTensor([[3]])), (torch.FloatTensor([[3.5 / 4], [1], [3.5 / 4], [0.5], [0]]), torch.FloatTensor([[2]])) ] normalized_batch, factors, offsets = self.interface.__normalize_batch__( batch) tt.assert_equal(offsets, expected_offsets) tt.assert_equal(factors, expected_factors) for i, normalized_element in enumerate(normalized_batch): normalized_x, normalized_y = normalized_element expected_normalized_x, expected_normalized_y = expected_normalized_batch[ i] tt.assert_equal(normalized_x, expected_normalized_x) tt.assert_equal(normalized_y, expected_normalized_y)