Ejemplo n.º 1
0
    def __init__(
        self,
        state_normalization_parameters: Dict[str, NormalizationParameters],
        parameters: DiscreteActionModelParameters,
        skip_normalization: Optional[bool] = False
    ) -> None:
        self._actions = parameters.actions

        self.num_processed_state_features = get_num_output_features(
            state_normalization_parameters
        )

        if parameters.training.layers[0] in [None, -1, 1]:
            parameters.training.layers[0] = self.num_state_features

        # There is a logical 1-dimensional output for each state/action pair,
        # but the underlying network computes num_actions-dimensional outputs
        if parameters.training.layers[-1] in [None, -1, 1]:
            parameters.training.layers[-1] = self.num_actions

        assert parameters.training.layers[-1] == self.num_actions,\
            "Set layers[-1] to a the number of actions or a default placeholder value"

        RLTrainer.__init__(
            self, state_normalization_parameters, parameters, skip_normalization
        )
    def __init__(
        self,
        parameters: ContinuousActionModelParameters,
        state_normalization_parameters: Dict[int, NormalizationParameters],
        action_normalization_parameters: Dict[int, NormalizationParameters],
        additional_feature_types:
        AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES,
    ) -> None:
        self._additional_feature_types = additional_feature_types
        self.state_normalization_parameters = state_normalization_parameters
        self.action_normalization_parameters = action_normalization_parameters
        num_features = get_num_output_features(
            state_normalization_parameters) + get_num_output_features(
                action_normalization_parameters)

        # ensure state and action IDs have no intersection
        overlapping_features = set(
            state_normalization_parameters.keys()) & set(
                action_normalization_parameters.keys())
        assert len(overlapping_features) == 0, (
            "There are some overlapping state and action features: " +
            str(overlapping_features))

        parameters.training.layers[0] = num_features
        parameters.training.layers[-1] = 1

        RLTrainer.__init__(self, parameters)

        self._create_internal_policy_net()
Ejemplo n.º 3
0
    def __init__(
        self,
        parameters: DiscreteActionModelParameters,
        normalization_parameters: Dict[int, NormalizationParameters],
        additional_feature_types:
        AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES,
    ) -> None:
        self._additional_feature_types = additional_feature_types
        self._actions = parameters.actions if parameters.actions is not None else []
        self.reward_shape = {}  # type: Dict[int, float]
        if parameters.rl.reward_boost is not None and self._actions is not None:
            for k in parameters.rl.reward_boost.keys():
                i = self._actions.index(k)
                self.reward_shape[i] = parameters.rl.reward_boost[k]
        if parameters.training.cnn_parameters is None:
            self.state_normalization_parameters: Optional[Dict[
                int, NormalizationParameters]] = normalization_parameters
            num_features = get_num_output_features(normalization_parameters)
            parameters.training.layers[0] = num_features
        else:
            self.state_normalization_parameters = None
        parameters.training.layers[-1] = self.num_actions

        RLTrainer.__init__(self, parameters)

        self._create_all_q_score_net()
        self._create_internal_policy_net()
    def __init__(self,
                 state_normalization_parameters: Dict[str,
                                                      NormalizationParameters],
                 action_normalization_parameters: Dict[
                     str, NormalizationParameters],
                 parameters: ContinuousActionModelParameters,
                 skip_normalization: Optional[bool] = False) -> None:
        self._action_features = list(action_normalization_parameters.keys())
        self.num_unprocessed_action_features = len(self._action_features)
        self.num_processed_action_features = get_num_output_features(
            action_normalization_parameters)

        self.num_processed_state_features = get_num_output_features(
            state_normalization_parameters)

        if parameters.training.layers[0] is None or\
           parameters.training.layers[0] == -1:
            parameters.training.layers[0] = self.num_state_features +\
                self.num_action_features

        assert parameters.training.layers[-1] == 1, "Set layers[-1] to 1"

        self._action_normalization_parameters = action_normalization_parameters
        RLTrainer.__init__(self, state_normalization_parameters, parameters,
                           skip_normalization)
        print(action_normalization_parameters)

        self._prepare_action_normalization()
Ejemplo n.º 5
0
    def __init__(
        self,
        parameters: DiscreteActionModelParameters,
        normalization_parameters: Dict[int, NormalizationParameters],
    ) -> None:
        self._actions = parameters.actions if parameters.actions is not None else []

        self.state_normalization_parameters = normalization_parameters
        num_features = get_num_output_features(normalization_parameters)
        parameters.training.layers[0] = num_features
        parameters.training.layers[-1] = self.num_actions

        RLTrainer.__init__(self, parameters)
Ejemplo n.º 6
0
    def __init__(
        self,
        parameters: ContinuousActionModelParameters,
        state_normalization_parameters: Dict[int, NormalizationParameters],
        action_normalization_parameters: Dict[int, NormalizationParameters],
    ) -> None:
        self.state_normalization_parameters = state_normalization_parameters
        self.action_normalization_parameters = action_normalization_parameters
        num_features = get_num_output_features(
            state_normalization_parameters) + get_num_output_features(
                action_normalization_parameters)

        parameters.training.layers[0] = num_features
        parameters.training.layers[-1] = 1

        RLTrainer.__init__(self, parameters)
Ejemplo n.º 7
0
    def __init__(
        self,
        parameters: DiscreteActionModelParameters,
        normalization_parameters: Dict[int, NormalizationParameters],
    ) -> None:
        self._actions = parameters.actions if parameters.actions is not None else []
        self.reward_shape = {}  # type: Dict[int, float]
        if parameters.rl.reward_boost is not None and self._actions is not None:
            for k in parameters.rl.reward_boost.keys():
                i = self._actions.index(k)
                self.reward_shape[i] = parameters.rl.reward_boost[k]
        self.state_normalization_parameters = normalization_parameters
        num_features = get_num_output_features(normalization_parameters)
        parameters.training.layers[0] = num_features
        parameters.training.layers[-1] = self.num_actions

        RLTrainer.__init__(self, parameters)

        self._create_all_q_score_net()
Ejemplo n.º 8
0
    def __init__(
        self,
        parameters: ContinuousActionModelParameters,
        state_normalization_parameters: Dict[int, NormalizationParameters],
        action_normalization_parameters: Dict[int, NormalizationParameters],
    ) -> None:
        self.state_normalization_parameters = state_normalization_parameters
        self.action_normalization_parameters = action_normalization_parameters
        num_features = get_num_output_features(
            state_normalization_parameters) + get_num_output_features(
                action_normalization_parameters)

        # ensure state and action IDs have no intersection
        overlapping_features = (set(state_normalization_parameters.keys())
                                & set(action_normalization_parameters.keys()))
        assert (
            len(overlapping_features) == 0
        ), "There are some overlapping state and action features: " + str(
            overlapping_features)

        parameters.training.layers[0] = num_features
        parameters.training.layers[-1] = 1

        RLTrainer.__init__(self, parameters)