def test_param_network(self, batch_size=1): input_spec = TensorSpec((3, 32, 32), torch.float32) conv_layer_params = ((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1, 2)) fc_layer_params = ((128, True), ) last_layer_size = 10 last_activation = math_ops.identity network = ParamNetwork(input_spec, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, last_layer_param=(last_layer_size, True), last_activation=last_activation) self.assertLen(network._fc_layers, 2) # test non-parallel forward image = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(image) output_shape = (batch_size, last_layer_size) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # test parallel forward replica = 2 image = input_spec.zeros(outer_dims=(batch_size, )) replica_image = input_spec.zeros(outer_dims=(batch_size, replica)) params = torch.randn(replica, network.param_length) network.set_parameters(params) output, _ = network(image) replica_output, _ = network(replica_image) self.assertEqual(output.shape, replica_output.shape) output_shape = (batch_size, replica, last_layer_size) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
def test_value_distribution(self, lstm_hidden_size): input_spec1 = TensorSpec((3, 20, 20)) input_spec2 = TensorSpec((100, )) conv_layer_params = ((8, 3, 1), (16, 3, 2, 1)) embedding_dim = 100 image = input_spec1.zeros(outer_dims=(1, )) vector = input_spec2.zeros(outer_dims=(1, )) network_ctor, state = self._init(lstm_hidden_size) value_net = network_ctor( input_tensor_spec=[input_spec1, input_spec2], input_preprocessors=[ EmbeddingPreprocessor( input_spec1, embedding_dim=embedding_dim, conv_layer_params=conv_layer_params), None ], preprocessing_combiner=NestConcat()) value, state = value_net([image, vector], state) self.assertEqual(value_net._processed_input_tensor_spec.shape[0], 200) self.assertEqual(value_net.output_spec, TensorSpec(())) # (batch_size,) self.assertEqual(value.shape, (1, ))
def test_parallel_image_encoding_network(self, same_padding, flatten_output): input_spec = TensorSpec((3, 80, 80), torch.float32) replica = 2 network = ParallelImageEncodingNetwork( input_channels=input_spec.shape[0], input_size=input_spec.shape[1:3], n=replica, conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0)), same_padding=same_padding, flatten_output=flatten_output) self.assertLen(list(network.parameters()), 4) batch_size = 3 # 1) shared input case img = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(img) if same_padding: output_shape = (batch_size, replica, 15, 20, 20) else: output_shape = (batch_size, replica, 15, 19, 19) if flatten_output: self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])), network.output_spec.shape) self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])), tuple(output.size())) else: self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # 2) non-shared input case img = input_spec.zeros(outer_dims=( batch_size, replica, )) output, _ = network(img) if same_padding: output_shape = (batch_size, replica, 15, 20, 20) else: output_shape = (batch_size, replica, 15, 19, 19) if flatten_output: self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])), network.output_spec.shape) self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])), tuple(output.size())) else: self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
class ICMAlgorithmTest(alf.test.TestCase): def setUp(self): self._input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=StepType.MID, reward=0, discount=1, observation=self._input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._hidden_size = 100 def test_discrete_action(self): action_spec = BoundedTensorSpec((), dtype=torch.int64, minimum=0, maximum=3) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a uniform distribution self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor( math.log(action_spec.maximum - action_spec.minimum + 1)), epsilon=1e-4) def test_continuous_action(self): action_spec = TensorSpec((4, )) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a zero action vector self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor(0))
def test_param_convnet(self, batch_size=1, same_padding=False, use_bias=True, flatten_output=False): input_spec = TensorSpec((3, 32, 32), torch.float32) network = ParamConvNet(input_channels=input_spec.shape[0], input_size=input_spec.shape[1:], conv_layer_params=((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1, 2)), same_padding=same_padding, activation=torch.tanh, flatten_output=flatten_output) self.assertLen(network._conv_layers, 2) # test non-parallel forward image = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(image) if same_padding: output_shape = (batch_size, 15, 15, 7) else: output_shape = (batch_size, 15, 17, 8) if flatten_output: output_shape = (batch_size, np.prod(output_shape[1:])) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # test parallel forward replica = 2 image = input_spec.zeros(outer_dims=(batch_size, )) replica_image = input_spec.zeros(outer_dims=(batch_size, replica)) params = torch.randn(replica, network.param_length) network.set_parameters(params) output, _ = network(image) replica_output, _ = network(replica_image) self.assertEqual(output.shape, replica_output.shape) if same_padding: output_shape = (batch_size, replica, 15, 15, 7) else: output_shape = (batch_size, replica, 15, 17, 8) if flatten_output: output_shape = (*output_shape[0:2], np.prod(output_shape[2:])) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
def setUp(self): input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=torch.tensor(StepType.MID, dtype=torch.int32), reward=0, discount=1, observation=input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._encoding_net = EncodingNetwork( input_tensor_spec=input_tensor_spec)
def test_parallel_image_decoding_network(self, preprocessing_fc_layers, same_padding): input_spec = TensorSpec((100, ), torch.float32) replica = 2 network = ParallelImageDecodingNetwork( input_size=input_spec.shape[0], n=replica, transconv_layer_params=((16, (2, 2), 1, (1, 0)), (64, 3, (1, 2), 0)), start_decoding_size=(20, 31), start_decoding_channels=8, same_padding=same_padding, preprocess_fc_layer_params=preprocessing_fc_layers) num_layers = 3 if preprocessing_fc_layers is None else 5 self.assertLen(list(network.parameters()), num_layers * 2) batch_size = 3 # 1) shared input case embedding = input_spec.zeros(outer_dims=(batch_size, )) output, _ = network(embedding) if same_padding: output_shape = (batch_size, replica, 64, 21, 63) else: output_shape = (batch_size, replica, 64, 21, 65) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size())) # 2) non-shared input case embedding = input_spec.zeros(outer_dims=( batch_size, replica, )) output, _ = network(embedding) if same_padding: output_shape = (batch_size, replica, 64, 21, 63) else: output_shape = (batch_size, replica, 64, 21, 65) self.assertEqual(output_shape[1:], network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()))
def test_encoding_network_img(self): input_spec = TensorSpec((3, 80, 80), torch.float32) img = input_spec.zeros(outer_dims=(1, )) network = EncodingNetwork(input_tensor_spec=input_spec, conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0))) self.assertLen(list(network.parameters()), 4) output, _ = network(img) output_spec = network._img_encoding_net.output_spec self.assertEqual(output.shape[-1], np.prod(output_spec.shape))
def test_continuous_skill_loss(self): skill_spec = TensorSpec((4, )) alg = DIAYNAlgorithm(skill_spec=skill_spec, encoding_net=self._encoding_net) skill = state = skill_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace( observation=[self._time_step.observation, skill]), state) # the discriminator should predict a zero skill vector self.assertTensorClose(torch.sum(alg_step.info.loss), torch.as_tensor(0))
def test_critic(self, lstm_hidden_size): obs_spec = TensorSpec((3, 20, 20), torch.float32) action_spec = TensorSpec((5, ), torch.float32) input_spec = (obs_spec, action_spec) observation_conv_layer_params = ((8, 3, 1), (16, 3, 2, 1)) action_fc_layer_params = (10, 8) joint_fc_layer_params = (6, 4) image = obs_spec.zeros(outer_dims=(1, )) action = action_spec.randn(outer_dims=(1, )) network_input = (image, action) network_ctor, state = self._init(lstm_hidden_size) critic_net = network_ctor( input_spec, observation_conv_layer_params=observation_conv_layer_params, action_fc_layer_params=action_fc_layer_params, joint_fc_layer_params=joint_fc_layer_params) value, state = critic_net._test_forward() self.assertEqual(value.shape, (1, )) if lstm_hidden_size is None: self.assertEqual(state, ()) value, state = critic_net(network_input, state) self.assertEqual(critic_net.output_spec, TensorSpec(())) # (batch_size,) self.assertEqual(value.shape, (1, )) # test make_parallel pnet = critic_net.make_parallel(6) if lstm_hidden_size is not None: # shape of state should be [B, n, ...] self.assertRaises(AssertionError, pnet, network_input, state) state = alf.nest.map_structure( lambda x: x.unsqueeze(1).expand(x.shape[0], 6, x.shape[1]), state) if lstm_hidden_size is None: self.assertTrue(isinstance(pnet, ParallelCriticNetwork)) else: self.assertTrue(isinstance(pnet, NaiveParallelNetwork)) value, state = pnet(network_input, state) self.assertEqual(pnet.output_spec, TensorSpec((6, ))) self.assertEqual(value.shape, (1, 6))
def test_parallel_q_network(self): input_spec = TensorSpec([10]) inputs = input_spec.zeros(outer_dims=(1, )) network_ctor, state = self._init(None) q_net = network_ctor(input_spec, self._action_spec) n = 5 parallel_q_net = q_net.make_parallel(n) q_value, _ = parallel_q_net(inputs, state) # (batch_size, n, num_actions) self.assertEqual(q_value.shape, (1, n, self._num_actions))
def test_continuous_action(self): action_spec = TensorSpec((4, )) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a zero action vector self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor(0))
def test_encoding_network_nonimg(self, last_layer_size, last_activation, output_tensor_spec): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(1, )) if (last_layer_size is None and last_activation is not None) or ( last_activation is None and last_layer_size is not None): with self.assertRaises(AssertionError): network = EncodingNetwork( input_tensor_spec=input_spec, output_tensor_spec=output_tensor_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh, last_layer_size=last_layer_size, last_activation=last_activation) else: network = EncodingNetwork(input_tensor_spec=input_spec, output_tensor_spec=output_tensor_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh, last_layer_size=last_layer_size, last_activation=last_activation) num_layers = 3 if last_layer_size is None else 4 self.assertLen(list(network.parameters()), num_layers * 2) if last_activation is None: self.assertEqual(network._fc_layers[-1]._activation, torch.tanh) else: self.assertEqual(network._fc_layers[-1]._activation, last_activation) output, _ = network(embedding) if output_tensor_spec is None: if last_layer_size is None: self.assertEqual(output.size()[1], 50) else: self.assertEqual(output.size()[1], last_layer_size) self.assertEqual(network.output_spec.shape, tuple(output.size()[1:])) else: self.assertEqual(tuple(output.size()[1:]), output_tensor_spec.shape) self.assertEqual(network.output_spec.shape, output_tensor_spec.shape)
def test_non_rnn(self): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(6, )) network = EncodingNetwork(input_tensor_spec=input_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh) replicas = 4 num_layers = 3 pnet = NaiveParallelNetwork(network, replicas) self.assertEqual(len(list(pnet.parameters())), num_layers * 2 * replicas) output, _ = pnet(embedding) self.assertEqual(output.shape, (6, replicas, 50)) self.assertEqual(pnet.output_spec.shape, (replicas, 50))
def test_actor_networks(self, lstm_hidden_size): obs_spec = TensorSpec((3, 20, 20), torch.float32) action_spec = BoundedTensorSpec((5, ), torch.float32, 2., 3.) conv_layer_params = ((8, 3, 1), (16, 3, 2, 1)) fc_layer_params = (10, 8) image = obs_spec.zeros(outer_dims=(1, )) network_ctor, state = self._init(lstm_hidden_size) actor_net = network_ctor(obs_spec, action_spec, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params) action, state = actor_net(image, state) # (batch_size, num_actions) self.assertEqual(action.shape, (1, 5))
def test_agent_steps(self): batch_size = 1 observation_spec = TensorSpec((10, )) action_spec = BoundedTensorSpec((), dtype='int64') time_step = TimeStep( observation=observation_spec.zeros(outer_dims=(batch_size, )), prev_action=action_spec.zeros(outer_dims=(batch_size, ))) actor_net = functools.partial(ActorDistributionNetwork, fc_layer_params=(100, )) value_net = functools.partial(ValueNetwork, fc_layer_params=(100, )) # TODO: add a goal generator and an entropy target algorithm once they # are implemented. agent = Agent(observation_spec=observation_spec, action_spec=action_spec, rl_algorithm_cls=functools.partial( ActorCriticAlgorithm, actor_network_ctor=actor_net, value_network_ctor=value_net), intrinsic_reward_module=ICMAlgorithm( action_spec=action_spec, observation_spec=observation_spec)) predict_state = agent.get_initial_predict_state(batch_size) rollout_state = agent.get_initial_rollout_state(batch_size) train_state = agent.get_initial_train_state(batch_size) pred_step = agent.predict_step(time_step, predict_state, epsilon_greedy=0.1) self.assertEqual(pred_step.state.irm, ()) rollout_step = agent.rollout_step(time_step, rollout_state) self.assertNotEqual(rollout_step.state.irm, ()) exp = make_experience(time_step, rollout_step, rollout_state) train_step = agent.train_step(exp, train_state) self.assertNotEqual(train_step.state.irm, ()) self.assertTensorEqual(rollout_step.state.irm, train_step.state.irm)
def test_rnn(self): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(6, )) network = LSTMEncodingNetwork(input_tensor_spec=input_spec, hidden_size=(30, 40)) replicas = 4 pnet = NaiveParallelNetwork(network, replicas) self.assertEqual(pnet.state_spec, [(TensorSpec((4, 30)), TensorSpec((4, 30))), (TensorSpec((4, 40)), TensorSpec((4, 40)))]) state = alf.utils.common.zero_tensor_from_nested_spec( pnet.state_spec, 6) output, state = pnet(embedding, state) self.assertEqual(output.shape, (6, replicas, 40)) self.assertEqual(pnet.output_spec.shape, (replicas, 40)) self.assertEqual(alf.utils.dist_utils.extract_spec(state), [(TensorSpec((4, 30)), TensorSpec((4, 30))), (TensorSpec((4, 40)), TensorSpec((4, 40)))])
def test_image_encoding_network(self, flatten_output, same_padding): input_spec = TensorSpec((3, 32, 32), torch.float32) img = input_spec.zeros(outer_dims=(1, )) network = ImageEncodingNetwork(input_channels=input_spec.shape[0], input_size=input_spec.shape[1:], conv_layer_params=((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1)), same_padding=same_padding, activation=torch.tanh, flatten_output=flatten_output) self.assertLen(list(network.parameters()), 4) # two conv2d layers output, _ = network(img) if same_padding: output_shape = (15, 30, 15) else: output_shape = (15, 34, 16) if flatten_output: output_shape = (np.prod(output_shape), ) self.assertEqual(output_shape, network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()[1:]))
def test_image_decoding_network(self, preprocessing_fc_layers, same_padding): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(1, )) network = ImageDecodingNetwork( input_size=input_spec.shape[0], transconv_layer_params=((16, (2, 2), 1, (1, 0)), (64, 3, (1, 2), 0)), start_decoding_size=(20, 31), start_decoding_channels=8, same_padding=same_padding, preprocess_fc_layer_params=preprocessing_fc_layers) num_layers = 3 if preprocessing_fc_layers is None else 5 self.assertLen(list(network.parameters()), num_layers * 2) output, _ = network(embedding) if same_padding: output_shape = (64, 21, 63) else: output_shape = (64, 21, 65) self.assertEqual(output_shape, network.output_spec.shape) self.assertEqual(output_shape, tuple(output.size()[1:]))
class EntropyTargetAlgorithmTest(parameterized.TestCase, alf.test.TestCase): def setUp(self): self._input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=torch.as_tensor(StepType.MID), reward=0, discount=1, observation=self._input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._hidden_size = 100 @parameterized.parameters((NormalProjectionNetwork, False), (NormalProjectionNetwork, True), (StableNormalProjectionNetwork, False), (StableNormalProjectionNetwork, True)) def test_run_entropy_target_algorithm(self, network_ctor, scaled): action_spec = BoundedTensorSpec((1, ), minimum=0, maximum=3) alg = EntropyTargetAlgorithm(action_spec=action_spec) net = network_ctor(self._input_tensor_spec.shape[0], action_spec, projection_output_init_gain=1.0, squash_mean=True, scale_distribution=scaled) embedding = 10 * torch.rand( (100, ) + self._input_tensor_spec.shape, dtype=torch.float32) dist, _ = net(embedding) alg_step = alg.train_step(dist, self._time_step.step_type) info = EntropyTargetInfo(loss=alg_step.info.loss) for i in range(-3, 1): alg._stage = torch.tensor(i, dtype=torch.int32) alg.calc_loss(self._time_step, info)
def testTensorSpecZero(self, dtype): spec = TensorSpec(self._shape, dtype) sample = spec.zeros(outer_dims=(3, 10)) self.assertEqual(sample.shape, (3, 10) + self._shape) self.assertTrue(torch.all(sample == 0))