Beispiel #1
0
    def build_trainer(self) -> C51Trainer:
        net_builder = self.net_builder.value
        q_network = net_builder.build_q_network(
            self.state_normalization_parameters, len(self.action_names)
        )

        if self.use_gpu:
            q_network = q_network.cuda()

        q_network_target = q_network.get_target_network()

        self._q_network = q_network

        return C51Trainer(
            q_network,
            q_network_target,
            self.trainer_param,
            self.use_gpu,
            metrics_to_score=self.metrics_to_score,
            loss_reporter=NoOpLossReporter(),
        )
Beispiel #2
0
def create_dqn_trainer_from_params(
    model: DiscreteActionModelParameters,
    normalization_parameters: Dict[int, NormalizationParameters],
    use_gpu: bool = False,
    use_all_avail_gpus: bool = False,
    metrics_to_score=None,
):
    metrics_to_score = metrics_to_score or []

    if model.rainbow.quantile:
        q_network = QuantileDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            num_atoms=model.rainbow.num_atoms,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )
    elif model.rainbow.categorical:
        q_network = CategoricalDQN(  # type: ignore
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            num_atoms=model.rainbow.num_atoms,
            qmin=model.rainbow.qmin,
            qmax=model.rainbow.qmax,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
            use_gpu=use_gpu,
        )
    elif model.rainbow.dueling_architecture:
        q_network = DuelingQNetwork(  # type: ignore
            layers=[get_num_output_features(normalization_parameters)] +
            model.training.layers[1:-1] + [len(model.actions)],
            activations=model.training.activations,
        )
    else:
        q_network = FullyConnectedDQN(  # type: ignore
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )

    if use_gpu and torch.cuda.is_available():
        q_network = q_network.cuda()

    q_network_target = q_network.get_target_network()

    reward_network, q_network_cpe, q_network_cpe_target = None, None, None
    if model.evaluation.calc_cpe_in_training:
        # Metrics + reward
        num_output_nodes = (len(metrics_to_score) + 1) * len(model.actions)
        reward_network = FullyConnectedDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=num_output_nodes,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )
        q_network_cpe = FullyConnectedDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=num_output_nodes,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )

        if use_gpu and torch.cuda.is_available():
            reward_network.cuda()
            q_network_cpe.cuda()

        q_network_cpe_target = q_network_cpe.get_target_network()

    if (use_all_avail_gpus and not model.rainbow.categorical
            and not model.rainbow.quantile):
        q_network = q_network.get_distributed_data_parallel_model()
        reward_network = (reward_network.get_distributed_data_parallel_model()
                          if reward_network else None)
        q_network_cpe = (q_network_cpe.get_distributed_data_parallel_model()
                         if q_network_cpe else None)

    if model.rainbow.quantile:
        assert (not use_all_avail_gpus
                ), "use_all_avail_gpus not implemented for distributional RL"
        return QRDQNTrainer(
            q_network,
            q_network_target,
            model,
            use_gpu,
            metrics_to_score=metrics_to_score,
        )

    elif model.rainbow.categorical:
        assert (not use_all_avail_gpus
                ), "use_all_avail_gpus not implemented for distributional RL"
        return C51Trainer(
            q_network,
            q_network_target,
            model,
            use_gpu,
            metrics_to_score=metrics_to_score,
        )

    else:
        return DQNTrainer(
            q_network,
            q_network_target,
            reward_network,
            model,
            use_gpu,
            q_network_cpe=q_network_cpe,
            q_network_cpe_target=q_network_cpe_target,
            metrics_to_score=metrics_to_score,
        )
    def get_modular_sarsa_trainer_reward_boost(
        self,
        environment,
        reward_shape,
        dueling,
        categorical,
        quantile,
        use_gpu=False,
        use_all_avail_gpus=False,
        clip_grad_norm=None,
    ):
        assert not quantile or not categorical
        parameters = self.get_sarsa_parameters(environment, reward_shape,
                                               dueling, categorical, quantile,
                                               clip_grad_norm)

        if quantile:
            if dueling:
                q_network = DuelingQuantileDQN(
                    layers=[
                        get_num_output_features(environment.normalization)
                    ] + parameters.training.layers[1:-1] +
                    [len(environment.ACTIONS)],
                    activations=parameters.training.activations,
                    num_atoms=parameters.rainbow.num_atoms,
                )
            else:
                q_network = QuantileDQN(
                    state_dim=get_num_output_features(
                        environment.normalization),
                    action_dim=len(environment.ACTIONS),
                    num_atoms=parameters.rainbow.num_atoms,
                    sizes=parameters.training.layers[1:-1],
                    activations=parameters.training.activations[:-1],
                )
        elif categorical:
            assert not dueling
            q_network = CategoricalDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                num_atoms=parameters.rainbow.num_atoms,
                qmin=-100,
                qmax=200,
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )
        else:
            if dueling:
                q_network = DuelingQNetwork(
                    layers=[
                        get_num_output_features(environment.normalization)
                    ] + parameters.training.layers[1:-1] +
                    [len(environment.ACTIONS)],
                    activations=parameters.training.activations,
                )
            else:
                q_network = FullyConnectedDQN(
                    state_dim=get_num_output_features(
                        environment.normalization),
                    action_dim=len(environment.ACTIONS),
                    sizes=parameters.training.layers[1:-1],
                    activations=parameters.training.activations[:-1],
                )

        q_network_cpe, q_network_cpe_target, reward_network = None, None, None

        if parameters.evaluation and parameters.evaluation.calc_cpe_in_training:
            q_network_cpe = FullyConnectedDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )
            q_network_cpe_target = q_network_cpe.get_target_network()
            reward_network = FullyConnectedDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )

        if use_gpu:
            q_network = q_network.cuda()
            if parameters.evaluation.calc_cpe_in_training:
                reward_network = reward_network.cuda()
                q_network_cpe = q_network_cpe.cuda()
                q_network_cpe_target = q_network_cpe_target.cuda()
            if use_all_avail_gpus and not categorical:
                q_network = q_network.get_distributed_data_parallel_model()
                reward_network = reward_network.get_distributed_data_parallel_model(
                )
                q_network_cpe = q_network_cpe.get_distributed_data_parallel_model(
                )
                q_network_cpe_target = (
                    q_network_cpe_target.get_distributed_data_parallel_model())

        if quantile:
            trainer = QRDQNTrainer(
                q_network,
                q_network.get_target_network(),
                parameters,
                use_gpu,
                reward_network=reward_network,
                q_network_cpe=q_network_cpe,
                q_network_cpe_target=q_network_cpe_target,
            )
        elif categorical:
            trainer = C51Trainer(q_network, q_network.get_target_network(),
                                 parameters, use_gpu)
        else:
            parameters = DQNTrainerParameters.from_discrete_action_model_parameters(
                parameters)
            trainer = DQNTrainer(
                q_network,
                q_network.get_target_network(),
                reward_network,
                parameters,
                use_gpu,
                q_network_cpe=q_network_cpe,
                q_network_cpe_target=q_network_cpe_target,
            )
        return trainer
Beispiel #4
0
    def get_modular_sarsa_trainer_reward_boost(
        self,
        environment,
        reward_shape,
        dueling,
        categorical,
        use_gpu=False,
        use_all_avail_gpus=False,
        clip_grad_norm=None,
    ):
        parameters = self.get_sarsa_parameters(environment, reward_shape,
                                               dueling, categorical,
                                               clip_grad_norm)

        if not categorical:
            q_network = FullyConnectedDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )
            q_network_cpe = FullyConnectedDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )
            reward_network = FullyConnectedDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )
        else:
            q_network = CategoricalDQN(
                state_dim=get_num_output_features(environment.normalization),
                action_dim=len(environment.ACTIONS),
                num_atoms=51,
                qmin=-100,
                qmax=200,
                sizes=parameters.training.layers[1:-1],
                activations=parameters.training.activations[:-1],
            )

        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            q_network_cpe = q_network_cpe.cuda()
            if use_all_avail_gpus and not categorical:
                q_network = q_network.get_distributed_data_parallel_model()
                reward_network = reward_network.get_distributed_data_parallel_model(
                )
                q_network_cpe = q_network_cpe.get_distributed_data_parallel_model(
                )

        if not categorical:
            trainer = DQNTrainer(
                q_network,
                q_network.get_target_network(),
                reward_network,
                parameters,
                use_gpu,
                q_network_cpe=q_network_cpe,
                q_network_cpe_target=q_network_cpe.get_target_network(),
            )
        else:
            trainer = C51Trainer(q_network, q_network.get_target_network(),
                                 parameters, use_gpu)
        return trainer