def test_forward(self, num_states, num_actions, batch_size): q_function = TabularQFunction(num_states=num_states, num_actions=num_actions) state = random_tensor(True, num_states, batch_size) action = random_tensor(True, num_actions, batch_size) value = q_function(state, action) assert value.shape == torch.Size([batch_size] if batch_size else []) assert value.dtype is torch.get_default_dtype()
def test_forward( self, discrete_state, discrete_action, dim_state, dim_action, batch_size ): if not (discrete_state and not discrete_action): self.init(discrete_state, discrete_action, dim_state, dim_action) state = random_tensor(discrete_state, dim_state, batch_size) action = random_tensor(discrete_action, dim_action, batch_size) value = self.q_function(state, action) assert value.shape == torch.Size([batch_size] if batch_size else []) assert value.dtype is torch.get_default_dtype()
def test_input_transform(self, batch_size): q_function = NNQFunction( dim_state=(2,), dim_action=(1,), layers=[64, 64], non_linearity="Tanh", input_transform=StateTransform(), ) value = q_function( random_tensor(False, 2, batch_size), random_tensor(False, 1, batch_size) ) assert value.shape == torch.Size([batch_size] if batch_size else []) assert value.dtype is torch.get_default_dtype()
def test_goal(self, batch_size): goal = random_tensor(False, 3, None) self.init(False, False, 4, 2, goal=goal) state = random_tensor(False, 4, batch_size) pi = tensor_to_distribution(self.policy(state)) action = pi.sample() assert action.shape == torch.Size([batch_size, 2] if batch_size else [2]) assert action.dtype is torch.get_default_dtype() other_goal = random_tensor(False, 3, None) self.policy.set_goal(other_goal) other_pi = tensor_to_distribution(self.policy(state)) assert not torch.any(other_pi.mean == pi.mean)
def test_call(self, discrete_state, discrete_action, dim_state, dim_action, batch_size): self.init(discrete_state, discrete_action, dim_state, dim_action) state = random_tensor(discrete_state, dim_state, batch_size) distribution = tensor_to_distribution(self.policy(state)) sample = distribution.sample() if distribution.has_enumerate_support: # Discrete assert isinstance(distribution, Categorical) if batch_size: assert distribution.logits.shape == (batch_size, self.num_actions) assert sample.shape == (batch_size, ) else: assert distribution.logits.shape == (self.num_actions, ) assert sample.shape == () else: # Continuous assert isinstance(distribution, MultivariateNormal) if batch_size: assert distribution.mean.shape == ( batch_size, ) + self.dim_action assert distribution.covariance_matrix.shape == ( batch_size, self.dim_action[0], self.dim_action[0], ) assert sample.shape == (batch_size, dim_action) else: assert distribution.mean.shape == self.dim_action assert distribution.covariance_matrix.shape == ( self.dim_action[0], self.dim_action[0], ) assert sample.shape == (dim_action, )
def test_forward(self, dim_state, dim_action, batch_size, deterministic): self.init(False, False, dim_state, dim_action, deterministic) state = random_tensor(False, dim_state, batch_size) distribution = tensor_to_distribution(self.policy(state)) sample = distribution.sample() if deterministic: assert isinstance(distribution, Delta) else: assert isinstance(distribution, MultivariateNormal) if batch_size: assert distribution.mean.shape == (batch_size,) + self.dim_action if not deterministic: assert distribution.covariance_matrix.shape == ( batch_size, self.dim_action[0], self.dim_action[0], ) assert sample.shape == (batch_size, dim_action) else: assert distribution.mean.shape == self.dim_action if not deterministic: assert distribution.covariance_matrix.shape == ( self.dim_action[0], self.dim_action[0], ) assert sample.shape == torch.Size((dim_action,))
def test_from_nn(self, discrete_state, dim_state, dim_action, batch_size): self.init(discrete_state, False, dim_state, dim_action) policy = NNPolicy.from_nn( HomoGaussianNN( self.policy.nn.kwargs["in_dim"], self.policy.nn.kwargs["out_dim"], layers=[20, 20], biased_head=False, ), self.dim_state, self.dim_action, num_states=self.num_states, num_actions=self.num_actions, ) state = random_tensor(discrete_state, dim_state, batch_size) action = tensor_to_distribution(policy(state)).sample() embeddings = policy.embeddings(state) assert action.shape == torch.Size( [batch_size, dim_action] if batch_size else [dim_action]) assert embeddings.shape == torch.Size( [batch_size, 20] if batch_size else [20]) assert action.dtype is torch.get_default_dtype() assert embeddings.dtype is torch.get_default_dtype()
def test_embeddings(self, discrete_state, dim_state, batch_size): self.init(discrete_state, dim_state) state = random_tensor(discrete_state, dim_state, batch_size) embeddings = self.value_function.embeddings(state) assert embeddings.shape == torch.Size([batch_size, 33] if batch_size else [33]) assert embeddings.dtype is torch.get_default_dtype()
def test_forward(self, discrete_state, dim_state, num_heads, batch_size): self.init(discrete_state, dim_state, num_heads) state = random_tensor(discrete_state, dim_state, batch_size) value = self.value_function(state) assert value.shape == torch.Size( [batch_size, num_heads] if batch_size else [num_heads]) assert value.dtype is torch.get_default_dtype()
def test_partial_q_function(self, num_states, num_actions, batch_size): q_function = TabularQFunction(num_states=num_states, num_actions=num_actions) state = random_tensor(True, num_states, batch_size) action_value = q_function(state) assert action_value.shape == torch.Size( [batch_size, num_actions] if batch_size else [num_actions] ) assert action_value.dtype is torch.get_default_dtype()
def test_goal(self, batch_size): goal = random_tensor(False, 3, None) policy = NNPolicy(dim_state=(4, ), dim_action=(2, ), layers=[32, 32], goal=goal) state = random_tensor(False, 4, batch_size) pi = tensor_to_distribution(policy(state)) action = pi.sample() assert action.shape == torch.Size( [batch_size, 2] if batch_size else [2]) assert action.dtype is torch.get_default_dtype() other_goal = random_tensor(False, 3, None) policy.set_goal(other_goal) other_pi = tensor_to_distribution(policy(state)) assert not torch.any(other_pi.mean == pi.mean)
def test_from_nn( self, discrete_state, discrete_action, dim_state, dim_action, batch_size ): if not (discrete_state and not discrete_action): self.init(discrete_state, discrete_action, dim_state, dim_action) q_function = NNQFunction.from_nn( nn.Linear( self.q_function.nn.kwargs["in_dim"][0], self.q_function.nn.kwargs["out_dim"][0], ), self.dim_state, self.dim_action, num_states=self.num_states, num_actions=self.num_actions, ) state = random_tensor(discrete_state, dim_state, batch_size) action = random_tensor(discrete_action, dim_action, batch_size) value = q_function(state, action) assert value.shape == torch.Size([batch_size] if batch_size else []) assert value.dtype is torch.get_default_dtype()
def test_input_transform(self, batch_size): policy = NNPolicy( dim_state=(2, ), dim_action=(4, ), layers=[64, 64], input_transform=StateTransform(), ) out = tensor_to_distribution( policy(random_tensor(False, 2, batch_size))) action = out.sample() assert action.shape == torch.Size( [batch_size, 4] if batch_size else [4]) assert action.dtype is torch.get_default_dtype()
def test_input_transform(self, num_heads, batch_size): value_function = NNEnsembleValueFunction( dim_state=(2, ), num_heads=num_heads, layers=[64, 64], non_linearity="Tanh", input_transform=StateTransform(), ) value = value_function(random_tensor(False, 2, batch_size)) assert value.shape == torch.Size( [batch_size, num_heads] if batch_size else [num_heads]) assert value.dtype is torch.get_default_dtype()
def test_embeddings(self, discrete_state, dim_state, num_heads, batch_size, biased_head): layers = [64, 64] self.init(discrete_state, dim_state, num_heads, layers=layers, biased_head=biased_head) dim = layers[-1] + 1 if biased_head else layers[-1] state = random_tensor(discrete_state, dim_state, batch_size) embeddings = self.value_function.embeddings(state) assert embeddings.shape == torch.Size( [batch_size, dim, num_heads] if batch_size else [dim, num_heads]) assert embeddings.dtype is torch.get_default_dtype()
def test_partial_q_function( self, discrete_state, discrete_action, dim_state, dim_action, batch_size ): if not (discrete_state and not discrete_action): self.init(discrete_state, discrete_action, dim_state, dim_action) state = random_tensor(discrete_state, dim_state, batch_size) if not discrete_action: with pytest.raises(NotImplementedError): self.q_function(state) else: action_value = self.q_function(state) assert action_value.shape == torch.Size( [batch_size, self.num_actions] if batch_size else [self.num_actions] ) assert action_value.dtype is torch.get_default_dtype()
def test_call(self, dim_state, dim_action, batch_size): self.init(dim_state, dim_action) state = random_tensor(False, dim_state, batch_size) distribution = tensor_to_distribution(self.policy(state)) sample = distribution.sample() assert isinstance(distribution, MultivariateNormal) if batch_size: assert distribution.mean.shape == (batch_size, dim_action) assert distribution.covariance_matrix.shape == ( batch_size, dim_action, dim_action, ) assert sample.shape == (batch_size, dim_action) else: assert distribution.mean.shape == (dim_action, ) assert distribution.covariance_matrix.shape == (dim_action, dim_action) assert sample.shape == (dim_action, )
def test_from_nn(self, discrete_state, dim_state, batch_size): self.init(discrete_state, dim_state) value_function = torch.jit.script( NNValueFunction.from_nn( DeterministicNN( self.value_function.nn.kwargs["in_dim"], self.value_function.nn.kwargs["out_dim"], layers=[20, 20], biased_head=False, ), self.dim_state, num_states=self.num_states, ) ) state = random_tensor(discrete_state, dim_state, batch_size) value = value_function(state) embeddings = value_function.embeddings(state) assert value.shape == torch.Size([batch_size] if batch_size else []) assert embeddings.shape == torch.Size([batch_size, 20] if batch_size else [20]) assert value.dtype is torch.get_default_dtype() assert embeddings.dtype is torch.get_default_dtype()