Exemple #1
0
 def _get_model(self, training_parameters, dueling_architecture=False):
     if dueling_architecture:
         return DuelingArchitectureQNetwork(
             training_parameters.layers,
             training_parameters.activations,
             action_dim=self.num_action_features,
         )
     elif training_parameters.factorization_parameters is None:
         return GenericFeedForwardNetwork(training_parameters.layers,
                                          training_parameters.activations)
     else:
         return ParametricInnerProduct(
             GenericFeedForwardNetwork(
                 training_parameters.factorization_parameters.state.layers,
                 training_parameters.factorization_parameters.state.
                 activations,
             ),
             GenericFeedForwardNetwork(
                 training_parameters.factorization_parameters.action.layers,
                 training_parameters.factorization_parameters.action.
                 activations,
             ),
             self.num_state_features,
             self.num_action_features,
         )
Exemple #2
0
    def __init__(
        self,
        parameters: DiscreteActionModelParameters,
        state_normalization_parameters: Dict[int, NormalizationParameters],
        use_gpu=False,
        additional_feature_types:
        AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES,
        gradient_handler=None,
    ) -> None:

        self.double_q_learning = parameters.rainbow.double_q_learning
        self.warm_start_model_path = parameters.training.warm_start_model_path
        self.minibatch_size = parameters.training.minibatch_size
        self._actions = parameters.actions if parameters.actions is not None else []

        self.reward_shape = {}  # type: Dict[int, float]
        if parameters.rl.reward_boost is not None and self._actions is not None:
            for k in parameters.rl.reward_boost.keys():
                i = self._actions.index(k)
                self.reward_shape[i] = parameters.rl.reward_boost[k]

        if parameters.training.cnn_parameters is None:
            self.state_normalization_parameters: Optional[Dict[
                int, NormalizationParameters]] = state_normalization_parameters
            self.num_features = get_num_output_features(
                state_normalization_parameters)
            parameters.training.layers[0] = self.num_features
        else:
            self.state_normalization_parameters = None
        parameters.training.layers[-1] = self.num_actions

        RLTrainer.__init__(self, parameters, use_gpu, additional_feature_types,
                           gradient_handler)

        if parameters.rainbow.dueling_architecture:
            self.q_network = DuelingArchitectureQNetwork(
                parameters.training.layers, parameters.training.activations)
        else:
            self.q_network = GenericFeedForwardNetwork(
                parameters.training.layers, parameters.training.activations)
        self.q_network_target = deepcopy(self.q_network)
        self._set_optimizer(parameters.training.optimizer)
        self.q_network_optimizer = self.optimizer_func(
            self.q_network.parameters(), lr=parameters.training.learning_rate)

        self.reward_network = GenericFeedForwardNetwork(
            parameters.training.layers, parameters.training.activations)
        self.reward_network_optimizer = self.optimizer_func(
            self.reward_network.parameters(),
            lr=parameters.training.learning_rate)

        if self.use_gpu:
            self.q_network.cuda()
            self.q_network_target.cuda()
            self.reward_network.cuda()
    def __init__(
        self,
        parameters: ContinuousActionModelParameters,
        state_normalization_parameters: Dict[int, NormalizationParameters],
        action_normalization_parameters: Dict[int, NormalizationParameters],
        use_gpu=False,
        additional_feature_types:
        AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES,
    ) -> None:

        self.warm_start_model_path = parameters.training.warm_start_model_path
        self.minibatch_size = parameters.training.minibatch_size
        self.state_normalization_parameters = state_normalization_parameters
        self.action_normalization_parameters = action_normalization_parameters
        self.num_features = get_num_output_features(
            state_normalization_parameters) + get_num_output_features(
                action_normalization_parameters)

        # ensure state and action IDs have no intersection
        overlapping_features = set(
            state_normalization_parameters.keys()) & set(
                action_normalization_parameters.keys())
        assert len(overlapping_features) == 0, (
            "There are some overlapping state and action features: " +
            str(overlapping_features))

        parameters.training.layers[0] = self.num_features
        parameters.training.layers[-1] = 1

        RLTrainer.__init__(self, parameters, use_gpu, additional_feature_types)

        self.q_network = GenericFeedForwardNetwork(
            parameters.training.layers, parameters.training.activations)
        self.q_network_target = deepcopy(self.q_network)
        self._set_optimizer(parameters.training.optimizer)
        self.q_network_optimizer = self.optimizer_func(
            self.q_network.parameters(), lr=parameters.training.learning_rate)

        self.reward_network = GenericFeedForwardNetwork(
            parameters.training.layers, parameters.training.activations)
        self.reward_network_optimizer = self.optimizer_func(
            self.reward_network.parameters(),
            lr=parameters.training.learning_rate)

        if self.use_gpu:
            self.q_network.cuda()
            self.q_network_target.cuda()
            self.reward_network.cuda()
Exemple #4
0
 def _get_model(self, training_parameters):
     if training_parameters.factorization_parameters is None:
         return GenericFeedForwardNetwork(training_parameters.layers,
                                          training_parameters.activations)
     else:
         return ParametricInnerProduct(
             GenericFeedForwardNetwork(
                 training_parameters.factorization_parameters.state.layers,
                 training_parameters.factorization_parameters.state.
                 activations,
             ),
             GenericFeedForwardNetwork(
                 training_parameters.factorization_parameters.action.layers,
                 training_parameters.factorization_parameters.action.
                 activations,
             ),
             self.num_state_features,
             self.num_action_features,
         )