def __init__(self, state_normalization_parameters: Dict[str, NormalizationParameters], parameters: Union[DiscreteActionModelParameters, ContinuousActionModelParameters], skip_normalization: Optional[bool] = False) -> None: print(state_normalization_parameters) print(parameters) self._state_normalization_parameters = state_normalization_parameters MLTrainer.__init__(self, "rl_trainer", parameters.training) self.target_network = TargetNetwork(self, parameters.rl.target_update_rate) self.reward_burnin = parameters.rl.reward_burnin self.maxq_learning = parameters.rl.maxq_learning self.rl_discount_rate = parameters.rl.gamma self.training_iteration = 0 self._buffers = None self.minibatch_size = parameters.training.minibatch_size self.skip_normalization = skip_normalization self._prepare_state_normalization()
def __init__(self, name, parameters, scaled_output=True): """ :param name: A unique name for this trainer used to create the data on the caffe2 workspace :param layers: A list of integers describing the layer sizes :param activations: A list of strings describing the activation functions """ self.scaled_output = scaled_output MLTrainer.__init__(self, name, parameters)
def __init__(self, name: str, fc_parameters: TrainingParameters, cnn_parameters: CNNModelParameters, img_height: int, img_width: int) -> None: self.init_height = img_height self.init_width = img_width self.dims = cnn_parameters.conv_dims self.conv_height_kernels = cnn_parameters.conv_height_kernels self.conv_width_kernels = cnn_parameters.conv_width_kernels self.pool_kernels_strides = cnn_parameters.pool_kernels_strides self.pool_types = cnn_parameters.pool_types MLTrainer.__init__(self, name, fc_parameters)
def __init__( self, parameters: Union[DiscreteActionModelParameters, ContinuousActionModelParameters], ) -> None: logger.info(str(parameters)) assert parameters.training.layers[0] >= 0,\ "Set layers[0] to a the number of features" self.num_features = parameters.training.layers[0] MLTrainer.__init__(self, RL_TRAINER_MODEL_ID, parameters.training) self.target_network = TargetNetwork( self, parameters.rl.target_update_rate ) self.reward_burnin = parameters.rl.reward_burnin self.maxq_learning = parameters.rl.maxq_learning self.rl_discount_rate = parameters.rl.gamma self.rl_temperature = parameters.rl.temperature self.training_iteration = 0 self.minibatch_size = parameters.training.minibatch_size self.parameters = parameters self.loss_blob: Optional[str] = None workspace.FeedBlob('states', np.array([0], dtype=np.float32)) workspace.FeedBlob('actions', np.array([0], dtype=np.float32)) workspace.FeedBlob('rewards', np.array([0], dtype=np.float32)) workspace.FeedBlob('next_states', np.array([0], dtype=np.float32)) workspace.FeedBlob('not_terminals', np.array([0], dtype=np.float32)) workspace.FeedBlob('next_actions', np.array([0], dtype=np.float32)) workspace.FeedBlob( 'possible_next_actions', np.array([0], dtype=np.float32) ) workspace.FeedBlob( 'possible_next_actions_lengths', np.array([0], dtype=np.float32) ) self.rl_train_model: Optional[ModelHelper] = None self.reward_train_model: Optional[ModelHelper] = None self.q_score_model: Optional[ModelHelper] = None self._create_reward_train_net() self._create_rl_train_net() self._create_q_score_net() assert self.rl_train_model is not None assert self.reward_train_model is not None assert self.q_score_model is not None
def __init__( self, parameters: Union[DiscreteActionModelParameters, ContinuousActionModelParameters], ) -> None: logger.info(str(parameters)) assert parameters.training.layers[0] >= 0,\ "Set layers[0] to a the number of features" self.num_features = parameters.training.layers[0] MLTrainer.__init__(self, "rl_trainer", parameters.training) self.target_network = TargetNetwork(self, parameters.rl.target_update_rate) self.reward_burnin = parameters.rl.reward_burnin self.maxq_learning = parameters.rl.maxq_learning self.rl_discount_rate = parameters.rl.gamma self.training_iteration = 0 self.minibatch_size = parameters.training.minibatch_size