コード例 #1
0
    def __init__(
        self,
        model_params: DiscreteActionModelParameters,
        state_normalization: Dict[int, NormalizationParameters],
        use_gpu: bool,
        use_all_avail_gpus: bool,
    ):
        logger.info("Running DQN workflow with params:")
        logger.info(model_params)
        model_params = model_params

        trainer = create_dqn_trainer_from_params(
            model_params,
            state_normalization,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )
        trainer = update_model_for_warm_start(trainer)
        assert type(trainer) == DQNTrainer, "Warm started wrong model type: " + str(
            type(trainer)
        )

        evaluator = Evaluator(
            model_params.actions,
            model_params.rl.gamma,
            trainer,
            metrics_to_score=trainer.metrics_to_score,
        )

        super().__init__(
            DiscreteDqnBatchPreprocessor(Preprocessor(state_normalization, use_gpu)),
            trainer,
            evaluator,
            model_params.training.minibatch_size,
        )
コード例 #2
0
    def test_trainer_maxq(self):
        env = Env(self.state_dims, self.action_dims)
        env.seed(42)
        maxq_parameters = DiscreteActionModelParameters(
            actions=env.actions,
            rl=RLParameters(gamma=0.95,
                            target_update_rate=0.9,
                            maxq_learning=True),
            rainbow=RainbowDQNParameters(double_q_learning=True,
                                         dueling_architecture=False),
            training=TrainingParameters(
                layers=self.layers,
                activations=self.activations,
                minibatch_size=self.minibatch_size,
                learning_rate=0.25,
                optimizer="ADAM",
            ),
        )
        maxq_trainer = create_dqn_trainer_from_params(maxq_parameters,
                                                      env.normalization)

        logger.info("Generating constant_reward MDPs..")

        states, actions, rewards, next_states, next_actions, is_terminal, possible_actions, possible_next_actions = env.generate_samples_discrete(
            self.num_samples)

        logger.info("Preprocessing constant_reward MDPs..")

        for epoch in range(self.epochs):
            tdps = env.preprocess_samples_discrete(
                states,
                actions,
                rewards,
                next_states,
                next_actions,
                is_terminal,
                possible_actions,
                possible_next_actions,
                self.minibatch_size,
            )
            logger.info("Training.. " + str(epoch))
            for tdp in tdps:
                maxq_trainer.train(tdp)
            logger.info(" ".join([
                "Training epoch",
                str(epoch),
                "average q values",
                str(torch.mean(maxq_trainer.all_action_scores)),
            ]))

        # Q value should converge to very close to 20
        avg_q_value_after_training = torch.mean(maxq_trainer.all_action_scores)

        self.assertLess(avg_q_value_after_training, 22)
        self.assertGreater(avg_q_value_after_training, 18)
コード例 #3
0
    def _train(self, model_params, env):
        np.random.seed(0)
        random.seed(0)
        torch.manual_seed(0)
        env.seed(42)
        trainer = create_dqn_trainer_from_params(model_params,
                                                 env.normalization)
        logger.info("Generating constant_reward MDPs..")

        states, actions, rewards, next_states, next_actions, is_terminal, possible_actions, possible_next_actions = env.generate_samples_discrete(
            self.num_samples)

        logger.info("Preprocessing constant_reward MDPs..")

        for epoch in range(self.epochs):
            tdps = env.preprocess_samples_discrete(
                states,
                actions,
                rewards,
                next_states,
                next_actions,
                is_terminal,
                possible_actions,
                possible_next_actions,
                model_params.training.minibatch_size,
            )
            logger.info("Training.. " + str(epoch))
            for tdp in tdps:
                trainer.train(tdp)
            logger.info(" ".join([
                "Training epoch",
                str(epoch),
                "average q values",
                str(torch.mean(trainer.all_action_scores)),
            ]))
        return trainer
コード例 #4
0
ファイル: run_gym.py プロジェクト: zzs4026/ReAgent
def create_trainer(params: OpenAiGymParameters, env: OpenAIGymEnvironment):
    use_gpu = params.use_gpu
    model_type = params.model_type
    assert params.rl is not None
    rl_parameters = params.rl

    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
            training_parameters._replace(
                cnn_parameters=training_parameters.cnn_parameters._replace(
                    input_height=env.height,
                    input_width=env.width,
                    num_input_channels=env.num_input_channels,
                )
            )
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        discrete_trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=params.rainbow,
            evaluation=params.evaluation,
        )
        trainer = create_dqn_trainer_from_params(
            discrete_trainer_params, env.normalization, use_gpu
        )

    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        continuous_trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters, training=training_parameters, rainbow=params.rainbow
        )
        trainer = create_parametric_dqn_trainer_from_params(
            continuous_trainer_params,
            env.normalization,
            env.normalization_action,
            use_gpu,
        )

    elif model_type == ModelType.TD3.value:
        assert params.td3_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        td3_trainer_params = TD3ModelParameters(
            rl=rl_parameters,
            training=params.td3_training,
            q_network=params.critic_training,
            actor_network=params.actor_training,
        )
        trainer = get_td3_trainer(env, td3_trainer_params, use_gpu)

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        assert params.sac_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        trainer = get_sac_trainer(
            env,
            rl_parameters,
            params.sac_training,
            params.critic_training,
            params.actor_training,
            params.sac_value_training,
            use_gpu,
        )
    elif model_type == ModelType.CEM.value:
        assert params.cem is not None
        cem_trainer_params = params.cem._replace(rl=params.rl)
        trainer = get_cem_trainer(env, cem_trainer_params, use_gpu)
    else:
        raise NotImplementedError("Model of type {} not supported".format(model_type))

    return trainer
コード例 #5
0
def create_trainer(model_type, params, rl_parameters, use_gpu, env):
    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            if isinstance(training_parameters.cnn_parameters, dict):
                training_parameters.cnn_parameters = CNNParameters(
                    **training_parameters.cnn_parameters
                )
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
            training_parameters.cnn_parameters.input_height = env.height
            training_parameters.cnn_parameters.input_width = env.width
            training_parameters.cnn_parameters.num_input_channels = (
                env.num_input_channels
            )
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=rainbow_parameters,
        )
        trainer = create_dqn_trainer_from_params(
            trainer_params, env.normalization, use_gpu
        )

    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters
        )
        trainer = create_parametric_dqn_trainer_from_params(
            trainer_params, env.normalization, env.normalization_action, use_gpu
        )

    elif model_type == ModelType.TD3.value:
        trainer_params = TD3ModelParameters(
            rl=rl_parameters,
            training=TD3TrainingParameters(
                minibatch_size=params["td3_training"]["minibatch_size"],
                q_network_optimizer=OptimizerParameters(
                    **params["td3_training"]["q_network_optimizer"]
                ),
                actor_network_optimizer=OptimizerParameters(
                    **params["td3_training"]["actor_network_optimizer"]
                ),
                use_2_q_functions=params["td3_training"]["use_2_q_functions"],
                exploration_noise=params["td3_training"]["exploration_noise"],
                initial_exploration_ts=params["td3_training"]["initial_exploration_ts"],
                target_policy_smoothing=params["td3_training"][
                    "target_policy_smoothing"
                ],
                noise_clip=params["td3_training"]["noise_clip"],
                delayed_policy_update=params["td3_training"]["delayed_policy_update"],
            ),
            q_network=FeedForwardParameters(**params["td3_q_training"]),
            actor_network=FeedForwardParameters(**params["td3_actor_training"]),
        )
        trainer = get_td3_trainer(env, trainer_params, use_gpu)

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        value_network = None
        value_network_optimizer = None
        alpha_optimizer = None
        if params["sac_training"]["use_value_network"]:
            value_network = FeedForwardParameters(**params["sac_value_training"])
            value_network_optimizer = OptimizerParameters(
                **params["sac_training"]["value_network_optimizer"]
            )
        if "alpha_optimizer" in params["sac_training"]:
            alpha_optimizer = OptimizerParameters(
                **params["sac_training"]["alpha_optimizer"]
            )
        entropy_temperature = params["sac_training"].get("entropy_temperature", None)
        target_entropy = params["sac_training"].get("target_entropy", None)

        trainer_params = SACModelParameters(
            rl=rl_parameters,
            training=SACTrainingParameters(
                minibatch_size=params["sac_training"]["minibatch_size"],
                use_2_q_functions=params["sac_training"]["use_2_q_functions"],
                use_value_network=params["sac_training"]["use_value_network"],
                q_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["q_network_optimizer"]
                ),
                value_network_optimizer=value_network_optimizer,
                actor_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["actor_network_optimizer"]
                ),
                entropy_temperature=entropy_temperature,
                target_entropy=target_entropy,
                alpha_optimizer=alpha_optimizer,
            ),
            q_network=FeedForwardParameters(**params["sac_q_training"]),
            value_network=value_network,
            actor_network=FeedForwardParameters(**params["sac_actor_training"]),
        )
        trainer = get_sac_trainer(env, trainer_params, use_gpu)

    else:
        raise NotImplementedError("Model of type {} not supported".format(model_type))

    return trainer
コード例 #6
0
def create_trainer(model_type, params, rl_parameters, use_gpu, env):
    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            if isinstance(training_parameters.cnn_parameters, dict):
                training_parameters.cnn_parameters = CNNParameters(
                    **training_parameters.cnn_parameters)
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
            training_parameters.cnn_parameters.input_height = env.height
            training_parameters.cnn_parameters.input_width = env.width
            training_parameters.cnn_parameters.num_input_channels = (
                env.num_input_channels)
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=rainbow_parameters,
        )
        trainer = create_dqn_trainer_from_params(trainer_params,
                                                 env.normalization, use_gpu)

    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        training_parameters = params["training"]
        if isinstance(training_parameters, dict):
            training_parameters = TrainingParameters(**training_parameters)
        rainbow_parameters = params["rainbow"]
        if isinstance(rainbow_parameters, dict):
            rainbow_parameters = RainbowDQNParameters(**rainbow_parameters)
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters,
            training=training_parameters,
            rainbow=rainbow_parameters)
        trainer = create_parametric_dqn_trainer_from_params(
            trainer_params, env.normalization, env.normalization_action,
            use_gpu)
    elif model_type == ModelType.CONTINUOUS_ACTION.value:
        training_parameters = params["shared_training"]
        if isinstance(training_parameters, dict):
            training_parameters = DDPGTrainingParameters(**training_parameters)

        actor_parameters = params["actor_training"]
        if isinstance(actor_parameters, dict):
            actor_parameters = DDPGNetworkParameters(**actor_parameters)

        critic_parameters = params["critic_training"]
        if isinstance(critic_parameters, dict):
            critic_parameters = DDPGNetworkParameters(**critic_parameters)

        trainer_params = DDPGModelParameters(
            rl=rl_parameters,
            shared_training=training_parameters,
            actor_training=actor_parameters,
            critic_training=critic_parameters,
        )

        action_range_low = env.action_space.low.astype(np.float32)
        action_range_high = env.action_space.high.astype(np.float32)

        trainer = DDPGTrainer(
            trainer_params,
            env.normalization,
            env.normalization_action,
            torch.from_numpy(action_range_low).unsqueeze(dim=0),
            torch.from_numpy(action_range_high).unsqueeze(dim=0),
            use_gpu,
        )

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        trainer_params = SACModelParameters(
            rl=rl_parameters,
            training=SACTrainingParameters(
                minibatch_size=params["sac_training"]["minibatch_size"],
                use_2_q_functions=params["sac_training"]["use_2_q_functions"],
                q_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["q_network_optimizer"]),
                value_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["value_network_optimizer"]),
                actor_network_optimizer=OptimizerParameters(
                    **params["sac_training"]["actor_network_optimizer"]),
                entropy_temperature=params["sac_training"]
                ["entropy_temperature"],
            ),
            q_network=FeedForwardParameters(**params["sac_q_training"]),
            value_network=FeedForwardParameters(
                **params["sac_value_training"]),
            actor_network=FeedForwardParameters(
                **params["sac_actor_training"]),
        )
        trainer = get_sac_trainer(env, trainer_params, use_gpu)

    else:
        raise NotImplementedError(
            "Model of type {} not supported".format(model_type))

    return trainer