def __init__( self, state_normalization_parameters: Dict[str, NormalizationParameters], parameters: DiscreteActionModelParameters, skip_normalization: Optional[bool] = False ) -> None: self._actions = parameters.actions self.num_processed_state_features = get_num_output_features( state_normalization_parameters ) if parameters.training.layers[0] in [None, -1, 1]: parameters.training.layers[0] = self.num_state_features # There is a logical 1-dimensional output for each state/action pair, # but the underlying network computes num_actions-dimensional outputs if parameters.training.layers[-1] in [None, -1, 1]: parameters.training.layers[-1] = self.num_actions assert parameters.training.layers[-1] == self.num_actions,\ "Set layers[-1] to a the number of actions or a default placeholder value" RLTrainer.__init__( self, state_normalization_parameters, parameters, skip_normalization )
def __init__( self, parameters: ContinuousActionModelParameters, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], additional_feature_types: AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES, ) -> None: self._additional_feature_types = additional_feature_types self.state_normalization_parameters = state_normalization_parameters self.action_normalization_parameters = action_normalization_parameters num_features = get_num_output_features( state_normalization_parameters) + get_num_output_features( action_normalization_parameters) # ensure state and action IDs have no intersection overlapping_features = set( state_normalization_parameters.keys()) & set( action_normalization_parameters.keys()) assert len(overlapping_features) == 0, ( "There are some overlapping state and action features: " + str(overlapping_features)) parameters.training.layers[0] = num_features parameters.training.layers[-1] = 1 RLTrainer.__init__(self, parameters) self._create_internal_policy_net()
def __init__( self, parameters: DiscreteActionModelParameters, normalization_parameters: Dict[int, NormalizationParameters], additional_feature_types: AdditionalFeatureTypes = DEFAULT_ADDITIONAL_FEATURE_TYPES, ) -> None: self._additional_feature_types = additional_feature_types self._actions = parameters.actions if parameters.actions is not None else [] self.reward_shape = {} # type: Dict[int, float] if parameters.rl.reward_boost is not None and self._actions is not None: for k in parameters.rl.reward_boost.keys(): i = self._actions.index(k) self.reward_shape[i] = parameters.rl.reward_boost[k] if parameters.training.cnn_parameters is None: self.state_normalization_parameters: Optional[Dict[ int, NormalizationParameters]] = normalization_parameters num_features = get_num_output_features(normalization_parameters) parameters.training.layers[0] = num_features else: self.state_normalization_parameters = None parameters.training.layers[-1] = self.num_actions RLTrainer.__init__(self, parameters) self._create_all_q_score_net() self._create_internal_policy_net()
def __init__(self, state_normalization_parameters: Dict[str, NormalizationParameters], action_normalization_parameters: Dict[ str, NormalizationParameters], parameters: ContinuousActionModelParameters, skip_normalization: Optional[bool] = False) -> None: self._action_features = list(action_normalization_parameters.keys()) self.num_unprocessed_action_features = len(self._action_features) self.num_processed_action_features = get_num_output_features( action_normalization_parameters) self.num_processed_state_features = get_num_output_features( state_normalization_parameters) if parameters.training.layers[0] is None or\ parameters.training.layers[0] == -1: parameters.training.layers[0] = self.num_state_features +\ self.num_action_features assert parameters.training.layers[-1] == 1, "Set layers[-1] to 1" self._action_normalization_parameters = action_normalization_parameters RLTrainer.__init__(self, state_normalization_parameters, parameters, skip_normalization) print(action_normalization_parameters) self._prepare_action_normalization()
def __init__( self, parameters: DiscreteActionModelParameters, normalization_parameters: Dict[int, NormalizationParameters], ) -> None: self._actions = parameters.actions if parameters.actions is not None else [] self.state_normalization_parameters = normalization_parameters num_features = get_num_output_features(normalization_parameters) parameters.training.layers[0] = num_features parameters.training.layers[-1] = self.num_actions RLTrainer.__init__(self, parameters)
def __init__( self, parameters: ContinuousActionModelParameters, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], ) -> None: self.state_normalization_parameters = state_normalization_parameters self.action_normalization_parameters = action_normalization_parameters num_features = get_num_output_features( state_normalization_parameters) + get_num_output_features( action_normalization_parameters) parameters.training.layers[0] = num_features parameters.training.layers[-1] = 1 RLTrainer.__init__(self, parameters)
def __init__( self, parameters: DiscreteActionModelParameters, normalization_parameters: Dict[int, NormalizationParameters], ) -> None: self._actions = parameters.actions if parameters.actions is not None else [] self.reward_shape = {} # type: Dict[int, float] if parameters.rl.reward_boost is not None and self._actions is not None: for k in parameters.rl.reward_boost.keys(): i = self._actions.index(k) self.reward_shape[i] = parameters.rl.reward_boost[k] self.state_normalization_parameters = normalization_parameters num_features = get_num_output_features(normalization_parameters) parameters.training.layers[0] = num_features parameters.training.layers[-1] = self.num_actions RLTrainer.__init__(self, parameters) self._create_all_q_score_net()
def __init__( self, parameters: ContinuousActionModelParameters, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], ) -> None: self.state_normalization_parameters = state_normalization_parameters self.action_normalization_parameters = action_normalization_parameters num_features = get_num_output_features( state_normalization_parameters) + get_num_output_features( action_normalization_parameters) # ensure state and action IDs have no intersection overlapping_features = (set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys())) assert ( len(overlapping_features) == 0 ), "There are some overlapping state and action features: " + str( overlapping_features) parameters.training.layers[0] = num_features parameters.training.layers[-1] = 1 RLTrainer.__init__(self, parameters)