コード例 #1
0
def create_park_trainer(model_type, params, rl_parameters, use_gpu, env):
    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            if isinstance(training_parameters.cnn_parameters, dict):
                training_parameters.cnn_parameters = CNNParameters(
                    **training_parameters.cnn_parameters)
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
            training_parameters.cnn_parameters.input_height = env.height
            training_parameters.cnn_parameters.input_width = env.width
            training_parameters.cnn_parameters.num_input_channels = (
                env.num_input_channels)
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=rainbow_parameters,
        )
        trainer = create_park_dqn_trainer_from_params(
            model=trainer_params,
            normalization_parameters=env.normalization,
            use_gpu=use_gpu,
            env=env.env)
    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters,
            training=training_parameters,
            rainbow=rainbow_parameters)
        trainer = create_parametric_dqn_trainer_from_params(
            trainer_params,
            env.normalization,
            env.normalization_action,
            use_gpu,
            env=env.env)
    elif model_type == ModelType.CONTINUOUS_ACTION.value:
        training_parameters = params["shared_training"]
        if isinstance(training_parameters, dict):
            training_parameters = DDPGTrainingParameters(**training_parameters)

        actor_parameters = params["actor_training"]
        if isinstance(actor_parameters, dict):
            actor_parameters = DDPGNetworkParameters(**actor_parameters)

        critic_parameters = params["critic_training"]
        if isinstance(critic_parameters, dict):
            critic_parameters = DDPGNetworkParameters(**critic_parameters)

        trainer_params = DDPGModelParameters(
            rl=rl_parameters,
            shared_training=training_parameters,
            actor_training=actor_parameters,
            critic_training=critic_parameters,
        )

        action_range_low = env.action_space.low.astype(np.float32)
        action_range_high = env.action_space.high.astype(np.float32)

        state_dim = get_num_output_features(env.normalization)
        action_dim = get_num_output_features(env.normalization_action)

        # Build Actor Network
        actor_network = ActorNetModel(
            layers=([state_dim] + trainer_params.actor_training.layers[1:-1] +
                    [action_dim]),
            activations=trainer_params.actor_training.activations,
            fl_init=trainer_params.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=False,
        )

        # Build Critic Network
        critic_network = CriticNetModel(
            # Ensure dims match input state and scalar output
            layers=[state_dim] + \
            trainer_params.critic_training.layers[1:-1] + [1],
            activations=trainer_params.critic_training.activations,
            fl_init=trainer_params.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=False,
        )

        trainer = DDPGTrainer(
            actor_network,
            critic_network,
            trainer_params,
            env.normalization,
            env.normalization_action,
            torch.from_numpy(action_range_low).unsqueeze(dim=0),
            torch.from_numpy(action_range_high).unsqueeze(dim=0),
            use_gpu,
        )

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        value_network = None
        value_network_optimizer = None
        if params["sac_training"]["use_value_network"]:
            value_network = FeedForwardParameters(
                **params["sac_value_training"])
            value_network_optimizer = OptimizerParameters(
                **params["sac_training"]["value_network_optimizer"])

        trainer_params = SACModelParameters(
            rl=rl_parameters,
            training=SACTrainingParameters(
                minibatch_size=params["sac_training"]["minibatch_size"],
                use_2_q_functions=params["sac_training"]["use_2_q_functions"],
                use_value_network=params["sac_training"]["use_value_network"],
                q_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["q_network_optimizer"]),
                value_network_optimizer=value_network_optimizer,
                actor_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["actor_network_optimizer"]),
                entropy_temperature=params["sac_training"]
                ["entropy_temperature"],
            ),
            q_network=FeedForwardParameters(**params["sac_q_training"]),
            value_network=value_network,
            actor_network=FeedForwardParameters(
                **params["sac_actor_training"]),
        )
        trainer = horizon_runner.get_sac_trainer(env, trainer_params, use_gpu)

    else:
        raise NotImplementedError(
            "Model of type {} not supported".format(model_type))

    return trainer
コード例 #2
0
    def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False):
        # FIXME:the test not really working
        self.run_pre_training_eval = False
        self.check_tolerance = False
        environment = GridworldContinuous()

        parameters = self.get_ddpg_parameters()

        state_dim = get_num_output_features(environment.normalization)
        action_dim = get_num_output_features(environment.normalization_action)

        # Build Actor Network
        actor_network = ActorNetModel(
            layers=[state_dim] + parameters.actor_training.layers[1:-1] +
            [action_dim],
            activations=parameters.actor_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        # Build Critic Network
        critic_network = CriticNetModel(
            # Ensure dims match input state and scalar output
            layers=[state_dim] + parameters.critic_training.layers[1:-1] + [1],
            activations=parameters.critic_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        trainer = DDPGTrainer(
            actor_network,
            critic_network,
            parameters,
            environment.normalization,
            environment.normalization_action,
            environment.min_action_range,
            environment.max_action_range,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        exporter = ParametricDQNExporter.from_state_action_normalization(
            trainer.critic,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        )

        evaluator = GridworldDDPGEvaluator(environment, DISCOUNT)
        self.evaluate_gridworld(environment, evaluator, trainer, exporter,
                                use_gpu)

        # Make sure actor predictor works
        actor = ActorExporter.from_state_action_normalization(
            trainer.actor,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        ).export()

        # Make sure all actions are optimal
        error = evaluator.evaluate_actor(actor, thres=0.2)
        print("gridworld optimal action match MAE: {0:.3f}".format(error))
コード例 #3
0
ファイル: ddpg_workflow.py プロジェクト: data-mining/Horizon
    def __init__(
        self,
        model_params: ContinuousActionModelParameters,
        preprocess_handler: PreprocessHandler,
        state_normalization: Dict[int, NormalizationParameters],
        action_normalization: Dict[int, NormalizationParameters],
        use_gpu: bool,
        use_all_avail_gpus: bool,
    ):
        logger.info("Running continuous workflow with params:")
        logger.info(model_params)
        model_params = model_params

        min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor(
            action_normalization, model_params.action_rescale_map
        )

        state_dim = get_num_output_features(state_normalization)
        action_dim = get_num_output_features(action_normalization)

        # Build Actor Network
        actor_network = ActorNetModel(
            layers=(
                [state_dim] + model_params.actor_training.layers[1:-1] + [action_dim]
            ),
            activations=model_params.actor_training.activations,
            fl_init=model_params.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
        )

        # Build Critic Network
        critic_network = CriticNetModel(
            # Ensure dims match input state and scalar output
            layers=[state_dim] + model_params.critic_training.layers[1:-1] + [1],
            activations=model_params.critic_training.activations,
            fl_init=model_params.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
        )

        trainer = DDPGTrainer(
            actor_network,
            critic_network,
            model_params,
            state_normalization,
            action_normalization,
            min_action_range_tensor_serving,
            max_action_range_tensor_serving,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )
        trainer = update_model_for_warm_start(trainer)
        assert type(trainer) == DDPGTrainer, "Warm started wrong model type: " + str(
            type(trainer)
        )

        evaluator = Evaluator(
            None,
            model_params.rl.gamma,
            trainer,
            metrics_to_score=trainer.metrics_to_score,
        )

        super().__init__(
            preprocess_handler,
            trainer,
            evaluator,
            model_params.shared_training.minibatch_size,
        )