def get_sac_parameters( self, use_2_q_functions=False, logged_action_uniform_prior=True, constrain_action_sum=False, ): return SACModelParameters( rl=RLParameters(gamma=DISCOUNT, target_update_rate=0.5), training=SACTrainingParameters( minibatch_size=self.minibatch_size, use_2_q_functions=use_2_q_functions, q_network_optimizer=OptimizerParameters(), value_network_optimizer=OptimizerParameters(), actor_network_optimizer=OptimizerParameters(), alpha_optimizer=OptimizerParameters(), logged_action_uniform_prior=logged_action_uniform_prior, ), q_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), value_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), actor_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), constrain_action_sum=constrain_action_sum, )
def get_sac_parameters(self, use_2_q_functions=False): return SACModelParameters( rl=RLParameters(gamma=DISCOUNT, target_update_rate=0.5, reward_burnin=100), training=SACTrainingParameters( minibatch_size=self.minibatch_size, use_2_q_functions=use_2_q_functions, q_network_optimizer=OptimizerParameters(), value_network_optimizer=OptimizerParameters(), actor_network_optimizer=OptimizerParameters(), ), q_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), value_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), actor_network=FeedForwardParameters(layers=[128, 64], activations=["relu", "relu"]), )
def create_trainer(model_type, params, rl_parameters, use_gpu, env): if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" if isinstance(training_parameters.cnn_parameters, dict): training_parameters.cnn_parameters = CNNParameters( **training_parameters.cnn_parameters) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) trainer = ParametricDQNTrainer(trainer_params, env.normalization, env.normalization_action, use_gpu) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: trainer_params = SACModelParameters( rl=rl_parameters, training=SACTrainingParameters( minibatch_size=params["sac_training"]["minibatch_size"], use_2_q_functions=params["sac_training"]["use_2_q_functions"], q_network_optimizer=OptimizerParameters( **params["sac_training"]["q_network_optimizer"]), value_network_optimizer=OptimizerParameters( **params["sac_training"]["value_network_optimizer"]), actor_network_optimizer=OptimizerParameters( **params["sac_training"]["actor_network_optimizer"]), entropy_temperature=params["sac_training"] ["entropy_temperature"], ), q_network=FeedForwardParameters(**params["sac_q_training"]), value_network=FeedForwardParameters( **params["sac_value_training"]), actor_network=FeedForwardParameters( **params["sac_actor_training"]), ) trainer = get_sac_trainer(env, trainer_params, use_gpu) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return trainer
def create_trainer(model_type, params, rl_parameters, use_gpu, env): if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" if isinstance(training_parameters.cnn_parameters, dict): training_parameters.cnn_parameters = CNNParameters( **training_parameters.cnn_parameters ) training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = create_dqn_trainer_from_params( trainer_params, env.normalization, use_gpu ) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters ) trainer = create_parametric_dqn_trainer_from_params( trainer_params, env.normalization, env.normalization_action, use_gpu ) elif model_type == ModelType.TD3.value: trainer_params = TD3ModelParameters( rl=rl_parameters, training=TD3TrainingParameters( minibatch_size=params["td3_training"]["minibatch_size"], q_network_optimizer=OptimizerParameters( **params["td3_training"]["q_network_optimizer"] ), actor_network_optimizer=OptimizerParameters( **params["td3_training"]["actor_network_optimizer"] ), use_2_q_functions=params["td3_training"]["use_2_q_functions"], exploration_noise=params["td3_training"]["exploration_noise"], initial_exploration_ts=params["td3_training"]["initial_exploration_ts"], target_policy_smoothing=params["td3_training"][ "target_policy_smoothing" ], noise_clip=params["td3_training"]["noise_clip"], delayed_policy_update=params["td3_training"]["delayed_policy_update"], ), q_network=FeedForwardParameters(**params["td3_q_training"]), actor_network=FeedForwardParameters(**params["td3_actor_training"]), ) trainer = get_td3_trainer(env, trainer_params, use_gpu) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: value_network = None value_network_optimizer = None alpha_optimizer = None if params["sac_training"]["use_value_network"]: value_network = FeedForwardParameters(**params["sac_value_training"]) value_network_optimizer = OptimizerParameters( **params["sac_training"]["value_network_optimizer"] ) if "alpha_optimizer" in params["sac_training"]: alpha_optimizer = OptimizerParameters( **params["sac_training"]["alpha_optimizer"] ) entropy_temperature = params["sac_training"].get("entropy_temperature", None) target_entropy = params["sac_training"].get("target_entropy", None) trainer_params = SACModelParameters( rl=rl_parameters, training=SACTrainingParameters( minibatch_size=params["sac_training"]["minibatch_size"], use_2_q_functions=params["sac_training"]["use_2_q_functions"], use_value_network=params["sac_training"]["use_value_network"], q_network_optimizer=OptimizerParameters( **params["sac_training"]["q_network_optimizer"] ), value_network_optimizer=value_network_optimizer, actor_network_optimizer=OptimizerParameters( **params["sac_training"]["actor_network_optimizer"] ), entropy_temperature=entropy_temperature, target_entropy=target_entropy, alpha_optimizer=alpha_optimizer, ), q_network=FeedForwardParameters(**params["sac_q_training"]), value_network=value_network, actor_network=FeedForwardParameters(**params["sac_actor_training"]), ) trainer = get_sac_trainer(env, trainer_params, use_gpu) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return trainer
def create_park_trainer(model_type, params, rl_parameters, use_gpu, env): if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" if isinstance(training_parameters.cnn_parameters, dict): training_parameters.cnn_parameters = CNNParameters( **training_parameters.cnn_parameters) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = create_park_dqn_trainer_from_params( model=trainer_params, normalization_parameters=env.normalization, use_gpu=use_gpu, env=env.env) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) trainer = create_parametric_dqn_trainer_from_params( trainer_params, env.normalization, env.normalization_action, use_gpu, env=env.env) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) # Build Actor Network actor_network = ActorNetModel( layers=([state_dim] + trainer_params.actor_training.layers[1:-1] + [action_dim]), activations=trainer_params.actor_training.activations, fl_init=trainer_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=False, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + \ trainer_params.critic_training.layers[1:-1] + [1], activations=trainer_params.critic_training.activations, fl_init=trainer_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=False, ) trainer = DDPGTrainer( actor_network, critic_network, trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: value_network = None value_network_optimizer = None if params["sac_training"]["use_value_network"]: value_network = FeedForwardParameters( **params["sac_value_training"]) value_network_optimizer = OptimizerParameters( **params["sac_training"]["value_network_optimizer"]) trainer_params = SACModelParameters( rl=rl_parameters, training=SACTrainingParameters( minibatch_size=params["sac_training"]["minibatch_size"], use_2_q_functions=params["sac_training"]["use_2_q_functions"], use_value_network=params["sac_training"]["use_value_network"], q_network_optimizer=OptimizerParameters( **params["sac_training"]["q_network_optimizer"]), value_network_optimizer=value_network_optimizer, actor_network_optimizer=OptimizerParameters( **params["sac_training"]["actor_network_optimizer"]), entropy_temperature=params["sac_training"] ["entropy_temperature"], ), q_network=FeedForwardParameters(**params["sac_q_training"]), value_network=value_network, actor_network=FeedForwardParameters( **params["sac_actor_training"]), ) trainer = horizon_runner.get_sac_trainer(env, trainer_params, use_gpu) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return trainer