def _get_sac_trainer_params(env, sac_model_params, use_gpu): state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) q1_network = FullyConnectedParametricDQN( state_dim, action_dim, sac_model_params.q_network.layers, sac_model_params.q_network.activations, ) q2_network = None if sac_model_params.training.use_2_q_functions: q2_network = FullyConnectedParametricDQN( state_dim, action_dim, sac_model_params.q_network.layers, sac_model_params.q_network.activations, ) value_network = FullyConnectedNetwork( [state_dim] + sac_model_params.value_network.layers + [1], sac_model_params.value_network.activations + ["linear"], ) actor_network = GaussianFullyConnectedActor( state_dim, action_dim, sac_model_params.actor_network.layers, sac_model_params.actor_network.activations, ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() value_network.cuda() actor_network.cuda() value_network_target = deepcopy(value_network) min_action_range_tensor_training = torch.full((1, action_dim), -1 + 1e-6) max_action_range_tensor_training = torch.full((1, action_dim), 1 - 1e-6) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) min_action_range_tensor_serving = torch.from_numpy(action_range_low).unsqueeze( dim=0 ) max_action_range_tensor_serving = torch.from_numpy(action_range_high).unsqueeze( dim=0 ) trainer_args = [ q1_network, value_network, value_network_target, actor_network, sac_model_params, ] trainer_kwargs = { "q2_network": q2_network, "min_action_range_tensor_training": min_action_range_tensor_training, "max_action_range_tensor_training": max_action_range_tensor_training, "min_action_range_tensor_serving": min_action_range_tensor_serving, "max_action_range_tensor_serving": max_action_range_tensor_serving, } return trainer_args, trainer_kwargs
def test_basic(self): state_dim = 8 action_dim = 4 model = GaussianFullyConnectedActor( state_dim, action_dim, sizes=[7, 6], activations=["relu", "relu"], use_batch_norm=True, ) input = model.input_prototype() self.assertEqual((1, state_dim), input.state.float_features.shape) # Using batch norm requires more than 1 example in training, avoid that model.eval() action = model(input) self.assertEqual((1, action_dim), action.action.shape)
def test_get_log_prob(self): torch.manual_seed(0) state_dim = 8 action_dim = 4 model = GaussianFullyConnectedActor( state_dim, action_dim, sizes=[7, 6], activations=["relu", "relu"], use_batch_norm=False, ) input = model.input_prototype() self.assertEqual((1, state_dim), input.state.float_features.shape) action = model(input) squashed_action = action.action.detach() action_log_prob = model.get_log_prob(input.state, squashed_action) npt.assert_allclose(action.log_prob.detach(), action_log_prob, rtol=1e-6)
def get_sac_trainer(self, env, parameters, use_gpu): state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features( env.normalization_continuous_action) q1_network = FullyConnectedParametricDQN( state_dim, action_dim, parameters.q_network.layers, parameters.q_network.activations, ) q2_network = None if parameters.training.use_2_q_functions: q2_network = FullyConnectedParametricDQN( state_dim, action_dim, parameters.q_network.layers, parameters.q_network.activations, ) if parameters.constrain_action_sum: actor_network = DirichletFullyConnectedActor( state_dim, action_dim, parameters.actor_network.layers, parameters.actor_network.activations, ) else: actor_network = GaussianFullyConnectedActor( state_dim, action_dim, parameters.actor_network.layers, parameters.actor_network.activations, ) value_network = None if parameters.training.use_value_network: value_network = FullyConnectedNetwork( [state_dim] + parameters.value_network.layers + [1], parameters.value_network.activations + ["linear"], ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() if value_network: value_network.cuda() actor_network.cuda() return SACTrainer( q1_network, actor_network, parameters, use_gpu=use_gpu, value_network=value_network, q2_network=q2_network, )
def get_sac_trainer(self, env, parameters, use_gpu): state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) q1_network = FullyConnectedParametricDQN( state_dim, action_dim, parameters.q_network.layers, parameters.q_network.activations, ) q2_network = None if parameters.training.use_2_q_functions: q2_network = FullyConnectedParametricDQN( state_dim, action_dim, parameters.q_network.layers, parameters.q_network.activations, ) value_network = FullyConnectedNetwork( [state_dim] + parameters.value_network.layers + [1], parameters.value_network.activations + ["linear"], ) actor_network = GaussianFullyConnectedActor( state_dim, action_dim, parameters.actor_network.layers, parameters.actor_network.activations, ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() value_network.cuda() actor_network.cuda() value_network_target = deepcopy(value_network) return SACTrainer( q1_network, value_network, value_network_target, actor_network, parameters, q2_network=q2_network, )
def test_exported_actor_randomness(self): state_dim = 8 action_dim = 4 model = GaussianFullyConnectedActor( state_dim, action_dim, sizes=[7, 6], activations=["relu", "relu"], use_batch_norm=False, ) net = save_pytorch_model_and_load_c2_net(model) input = model.input_prototype() input_tensors = _flatten_named_tuple(input) input_names = model.input_blob_names() for name, tensor in zip(input_names, input_tensors): workspace.FeedBlob(name, tensor.numpy()) workspace.RunNet(net) action_1 = workspace.FetchBlob(model.output_blob_names()[0]) workspace.RunNet(net) action_2 = workspace.FetchBlob(model.output_blob_names()[0]) # check to see that the actions are different difference = sum(abs(action_1[0] - action_2[0])) self.assertGreater(difference, 0.01)
def test_save_load(self): state_dim = 8 action_dim = 4 model = GaussianFullyConnectedActor( state_dim, action_dim, sizes=[7, 6], activations=["relu", "relu"], use_batch_norm=False, ) expected_num_params, expected_num_inputs, expected_num_outputs = 8, 1, 1 check_save_load(self, model, expected_num_params, expected_num_inputs, expected_num_outputs)
def build_actor( self, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, ) -> ModelBase: state_dim = get_num_output_features( state_normalization_data.dense_normalization_parameters ) action_dim = get_num_output_features( action_normalization_data.dense_normalization_parameters ) return GaussianFullyConnectedActor( state_dim=state_dim, action_dim=action_dim, sizes=self.sizes, activations=self.activations, use_batch_norm=self.use_batch_norm, use_layer_norm=self.use_layer_norm, )
def test_save_load(self): state_dim = 8 action_dim = 4 model = GaussianFullyConnectedActor( state_dim, action_dim, sizes=[7, 6], activations=["relu", "relu"], use_batch_norm=False, ) expected_num_params, expected_num_inputs, expected_num_outputs = 6, 1, 1 # Actor output is stochastic and won't match between PyTorch & Caffe2 check_save_load( self, model, expected_num_params, expected_num_inputs, expected_num_outputs, check_equality=False, )
def get_sac_trainer( self, env, use_gpu, use_2_q_functions=False, logged_action_uniform_prior=True, constrain_action_sum=False, use_value_network=True, ): q_network_params = FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]) value_network_params = FeedForwardParameters( layers=[128, 64], activations=["relu", "relu"]) actor_network_params = FeedForwardParameters( layers=[128, 64], activations=["relu", "relu"]) state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features( env.normalization_continuous_action) q1_network = FullyConnectedParametricDQN(state_dim, action_dim, q_network_params.layers, q_network_params.activations) q2_network = None if use_2_q_functions: q2_network = FullyConnectedParametricDQN( state_dim, action_dim, q_network_params.layers, q_network_params.activations, ) if constrain_action_sum: actor_network = DirichletFullyConnectedActor( state_dim, action_dim, actor_network_params.layers, actor_network_params.activations, ) else: actor_network = GaussianFullyConnectedActor( state_dim, action_dim, actor_network_params.layers, actor_network_params.activations, ) value_network = None if use_value_network: value_network = FullyConnectedNetwork( [state_dim] + value_network_params.layers + [1], value_network_params.activations + ["linear"], ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() if value_network: value_network.cuda() actor_network.cuda() parameters = SACTrainerParameters( rl=RLParameters(gamma=DISCOUNT, target_update_rate=0.5), minibatch_size=self.minibatch_size, q_network_optimizer=OptimizerParameters(), value_network_optimizer=OptimizerParameters(), actor_network_optimizer=OptimizerParameters(), alpha_optimizer=OptimizerParameters(), logged_action_uniform_prior=logged_action_uniform_prior, ) return SACTrainer( q1_network, actor_network, parameters, use_gpu=use_gpu, value_network=value_network, q2_network=q2_network, )
def _get_sac_trainer_params(env: OpenAIGymEnvironment, sac_model_params: SACModelParameters, use_gpu: bool): state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) q1_network = FullyConnectedParametricDQN( state_dim, action_dim, sac_model_params.q_network.layers, sac_model_params.q_network.activations, ) q2_network = None if sac_model_params.training.use_2_q_functions: q2_network = FullyConnectedParametricDQN( state_dim, action_dim, sac_model_params.q_network.layers, sac_model_params.q_network.activations, ) value_network = None if sac_model_params.training.use_value_network: assert sac_model_params.value_network is not None value_network = FullyConnectedNetwork( [state_dim] + sac_model_params.value_network.layers + [1], sac_model_params.value_network.activations + ["linear"], ) actor_network = GaussianFullyConnectedActor( state_dim, action_dim, sac_model_params.actor_network.layers, sac_model_params.actor_network.activations, ) min_action_range_tensor_training = torch.full((1, action_dim), -1 + 1e-6) max_action_range_tensor_training = torch.full((1, action_dim), 1 - 1e-6) min_action_range_tensor_serving = ( torch.from_numpy(env.action_space.low).float().unsqueeze( dim=0) # type: ignore ) max_action_range_tensor_serving = ( torch.from_numpy(env.action_space.high).float().unsqueeze( dim=0) # type: ignore ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() if value_network: value_network.cuda() actor_network.cuda() min_action_range_tensor_training = min_action_range_tensor_training.cuda( ) max_action_range_tensor_training = max_action_range_tensor_training.cuda( ) min_action_range_tensor_serving = min_action_range_tensor_serving.cuda( ) max_action_range_tensor_serving = max_action_range_tensor_serving.cuda( ) trainer_args = [q1_network, actor_network, sac_model_params] trainer_kwargs = { "value_network": value_network, "q2_network": q2_network, "min_action_range_tensor_training": min_action_range_tensor_training, "max_action_range_tensor_training": max_action_range_tensor_training, "min_action_range_tensor_serving": min_action_range_tensor_serving, "max_action_range_tensor_serving": max_action_range_tensor_serving, } return trainer_args, trainer_kwargs
def get_sac_trainer( env: OpenAIGymEnvironment, rl_parameters: RLParameters, trainer_parameters: SACTrainerParameters, critic_training: FeedForwardParameters, actor_training: FeedForwardParameters, sac_value_training: Optional[FeedForwardParameters], use_gpu: bool, ) -> SACTrainer: assert rl_parameters == trainer_parameters.rl state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) q1_network = FullyConnectedParametricDQN(state_dim, action_dim, critic_training.layers, critic_training.activations) q2_network = None # TODO: # if trainer_parameters.use_2_q_functions: # q2_network = FullyConnectedParametricDQN( # state_dim, # action_dim, # critic_training.layers, # critic_training.activations, # ) value_network = None if sac_value_training: value_network = FullyConnectedNetwork( [state_dim] + sac_value_training.layers + [1], sac_value_training.activations + ["linear"], ) actor_network = GaussianFullyConnectedActor(state_dim, action_dim, actor_training.layers, actor_training.activations) min_action_range_tensor_training = torch.full((1, action_dim), -1 + 1e-6) max_action_range_tensor_training = torch.full((1, action_dim), 1 - 1e-6) min_action_range_tensor_serving = ( torch.from_numpy(env.action_space.low).float().unsqueeze( dim=0) # type: ignore ) max_action_range_tensor_serving = ( torch.from_numpy(env.action_space.high).float().unsqueeze( dim=0) # type: ignore ) if use_gpu: q1_network.cuda() if q2_network: q2_network.cuda() if value_network: value_network.cuda() actor_network.cuda() min_action_range_tensor_training = min_action_range_tensor_training.cuda( ) max_action_range_tensor_training = max_action_range_tensor_training.cuda( ) min_action_range_tensor_serving = min_action_range_tensor_serving.cuda( ) max_action_range_tensor_serving = max_action_range_tensor_serving.cuda( ) return SACTrainer( q1_network, actor_network, trainer_parameters, use_gpu=use_gpu, value_network=value_network, q2_network=q2_network, min_action_range_tensor_training=min_action_range_tensor_training, max_action_range_tensor_training=max_action_range_tensor_training, min_action_range_tensor_serving=min_action_range_tensor_serving, max_action_range_tensor_serving=max_action_range_tensor_serving, )