def test_rollout_preprocess(self, mock_parameters): self._setup_parameters(mock_parameters.return_value) mock_parameters.return_value.preprocessing = \ 'cntk.contrib.deeprl.agent.shared.preprocessing.SlidingWindow' mock_parameters.return_value.preprocessing_args = '(2, "float32")' action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1, )) sut = ActorCritic('', observation_space, action_space) sut._choose_action = Mock(side_effect=[(0, ''), (1, ''), (1, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) np.testing.assert_array_equal(sut._trajectory_states, [ np.array([[0], [0.1]], np.float32), np.array([[0.1], [0.2]], np.float32), np.array([[0.2], [0.3]], np.float32) ]) sut.end(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2, 0.3]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) np.testing.assert_array_equal(sut._trajectory_states, [ np.array([[0], [0.1]], np.float32), np.array([[0.1], [0.2]], np.float32), np.array([[0.2], [0.3]], np.float32) ])
def test_rollout_with_update(self, mock_parameters): self._setup_parameters(mock_parameters.return_value) mock_parameters.return_value.update_frequency = 2 action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1, )) sut = ActorCritic('', observation_space, action_space) sut._update_networks = MagicMock() sut._choose_action = Mock( side_effect=[(0, ''), (1, ''), (1, ''), (0, ''), (1, ''), (0, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1]) self.assertEqual(sut._trajectory_actions, [0, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2]) self.assertEqual(sut._update_networks.call_count, 0) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [1]) self.assertEqual(sut._trajectory_states, [0.3]) self.assertEqual(sut._update_networks.call_count, 1) sut.step(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.3]) self.assertEqual(sut._trajectory_actions, [1, 0]) self.assertEqual(sut._trajectory_states, [0.3, 0.4]) self.assertEqual(sut._update_networks.call_count, 1) sut.start(np.array([0.5], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [1]) self.assertEqual(sut._trajectory_states, [0.5]) self.assertEqual(sut._update_networks.call_count, 1) sut.step(0.4, np.array([0.6], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [0]) self.assertEqual(sut._trajectory_states, [0.6]) self.assertEqual(sut._update_networks.call_count, 2) sut.end(0.5, np.array([0.7], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.5]) self.assertEqual(sut._trajectory_actions, [0]) self.assertEqual(sut._trajectory_states, [0.6]) self.assertEqual(sut._update_networks.call_count, 2)
def test_rollout_with_update(self, mock_parameters): self._setup_parameters(mock_parameters.return_value) mock_parameters.return_value.update_frequency = 2 action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1,)) sut = ActorCritic('', observation_space, action_space) sut._update_networks = MagicMock() sut._choose_action = Mock(side_effect=[ (0, ''), (1, ''), (1, ''), (0, ''), (1, ''), (0, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1]) self.assertEqual(sut._trajectory_actions, [0, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2]) self.assertEqual(sut._update_networks.call_count, 0) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [1]) self.assertEqual(sut._trajectory_states, [0.3]) self.assertEqual(sut._update_networks.call_count, 1) sut.step(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.3]) self.assertEqual(sut._trajectory_actions, [1, 0]) self.assertEqual(sut._trajectory_states, [0.3, 0.4]) self.assertEqual(sut._update_networks.call_count, 1) sut.start(np.array([0.5], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [1]) self.assertEqual(sut._trajectory_states, [0.5]) self.assertEqual(sut._update_networks.call_count, 1) sut.step(0.4, np.array([0.6], np.float32)) self.assertEqual(sut._trajectory_rewards, []) self.assertEqual(sut._trajectory_actions, [0]) self.assertEqual(sut._trajectory_states, [0.6]) self.assertEqual(sut._update_networks.call_count, 2) sut.end(0.5, np.array([0.7], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.5]) self.assertEqual(sut._trajectory_actions, [0]) self.assertEqual(sut._trajectory_states, [0.6]) self.assertEqual(sut._update_networks.call_count, 2)
def test_rollout(self): action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1, )) sut = ActorCritic('', observation_space, action_space) sut._choose_action = Mock(side_effect=[(0, ''), (1, ''), (1, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2, 0.3]) sut.end(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2, 0.3]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2, 0.3])
def test_rollout(self): action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1,)) sut = ActorCritic('', observation_space, action_space) sut._choose_action = Mock(side_effect=[(0, ''), (1, ''), (1, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2, 0.3]) sut.end(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2, 0.3]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) self.assertEqual(sut._trajectory_states, [0.1, 0.2, 0.3])
def test_rollout_preprocess(self, mock_parameters): self._setup_parameters(mock_parameters.return_value) mock_parameters.return_value.preprocessing = \ 'cntk.contrib.deeprl.agent.shared.preprocessing.SlidingWindow' mock_parameters.return_value.preprocessing_args = '(2, "float32")' action_space = spaces.Discrete(2) observation_space = spaces.Box(0, 1, (1,)) sut = ActorCritic('', observation_space, action_space) sut._choose_action = Mock(side_effect=[(0, ''), (1, ''), (1, '')]) sut.start(np.array([0.1], np.float32)) sut.step(0.1, np.array([0.2], np.float32)) sut.step(0.2, np.array([0.3], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) np.testing.assert_array_equal( sut._trajectory_states, [ np.array([[0], [0.1]], np.float32), np.array([[0.1], [0.2]], np.float32), np.array([[0.2], [0.3]], np.float32) ]) sut.end(0.3, np.array([0.4], np.float32)) self.assertEqual(sut._trajectory_rewards, [0.1, 0.2, 0.3]) self.assertEqual(sut._trajectory_actions, [0, 1, 1]) np.testing.assert_array_equal( sut._trajectory_states, [ np.array([[0], [0.1]], np.float32), np.array([[0.1], [0.2]], np.float32), np.array([[0.2], [0.3]], np.float32) ])