コード例 #1
0
ファイル: rl_trainer.py プロジェクト: caozhengquan/BlueWhale
    def __init__(self,
                 state_normalization_parameters: Dict[str,
                                                      NormalizationParameters],
                 parameters: Union[DiscreteActionModelParameters,
                                   ContinuousActionModelParameters],
                 skip_normalization: Optional[bool] = False) -> None:
        print(state_normalization_parameters)
        print(parameters)

        self._state_normalization_parameters = state_normalization_parameters
        MLTrainer.__init__(self, "rl_trainer", parameters.training)

        self.target_network = TargetNetwork(self,
                                            parameters.rl.target_update_rate)

        self.reward_burnin = parameters.rl.reward_burnin
        self.maxq_learning = parameters.rl.maxq_learning
        self.rl_discount_rate = parameters.rl.gamma

        self.training_iteration = 0
        self._buffers = None
        self.minibatch_size = parameters.training.minibatch_size

        self.skip_normalization = skip_normalization
        self._prepare_state_normalization()
コード例 #2
0
    def __init__(self, name, parameters, scaled_output=True):
        """

        :param name: A unique name for this trainer used to create the data on the
            caffe2 workspace
        :param layers: A list of integers describing the layer sizes
        :param activations: A list of strings describing the activation functions
        """
        self.scaled_output = scaled_output
        MLTrainer.__init__(self, name, parameters)
コード例 #3
0
    def __init__(self, name: str, fc_parameters: TrainingParameters,
                 cnn_parameters: CNNModelParameters, img_height: int,
                 img_width: int) -> None:
        self.init_height = img_height
        self.init_width = img_width
        self.dims = cnn_parameters.conv_dims
        self.conv_height_kernels = cnn_parameters.conv_height_kernels
        self.conv_width_kernels = cnn_parameters.conv_width_kernels
        self.pool_kernels_strides = cnn_parameters.pool_kernels_strides
        self.pool_types = cnn_parameters.pool_types

        MLTrainer.__init__(self, name, fc_parameters)
コード例 #4
0
ファイル: rl_trainer.py プロジェクト: chinpeng/BlueWhale
    def __init__(
        self,
        parameters: Union[DiscreteActionModelParameters,
                          ContinuousActionModelParameters],
    ) -> None:
        logger.info(str(parameters))

        assert parameters.training.layers[0] >= 0,\
            "Set layers[0] to a the number of features"

        self.num_features = parameters.training.layers[0]

        MLTrainer.__init__(self, RL_TRAINER_MODEL_ID, parameters.training)

        self.target_network = TargetNetwork(
            self, parameters.rl.target_update_rate
        )

        self.reward_burnin = parameters.rl.reward_burnin
        self.maxq_learning = parameters.rl.maxq_learning
        self.rl_discount_rate = parameters.rl.gamma
        self.rl_temperature = parameters.rl.temperature
        self.training_iteration = 0
        self.minibatch_size = parameters.training.minibatch_size
        self.parameters = parameters
        self.loss_blob: Optional[str] = None

        workspace.FeedBlob('states', np.array([0], dtype=np.float32))
        workspace.FeedBlob('actions', np.array([0], dtype=np.float32))
        workspace.FeedBlob('rewards', np.array([0], dtype=np.float32))
        workspace.FeedBlob('next_states', np.array([0], dtype=np.float32))
        workspace.FeedBlob('not_terminals', np.array([0], dtype=np.float32))
        workspace.FeedBlob('next_actions', np.array([0], dtype=np.float32))
        workspace.FeedBlob(
            'possible_next_actions', np.array([0], dtype=np.float32)
        )
        workspace.FeedBlob(
            'possible_next_actions_lengths', np.array([0], dtype=np.float32)
        )

        self.rl_train_model: Optional[ModelHelper] = None
        self.reward_train_model: Optional[ModelHelper] = None
        self.q_score_model: Optional[ModelHelper] = None
        self._create_reward_train_net()
        self._create_rl_train_net()
        self._create_q_score_net()
        assert self.rl_train_model is not None
        assert self.reward_train_model is not None
        assert self.q_score_model is not None
コード例 #5
0
    def __init__(
        self,
        parameters: Union[DiscreteActionModelParameters,
                          ContinuousActionModelParameters],
    ) -> None:
        logger.info(str(parameters))

        assert parameters.training.layers[0] >= 0,\
            "Set layers[0] to a the number of features"

        self.num_features = parameters.training.layers[0]

        MLTrainer.__init__(self, "rl_trainer", parameters.training)

        self.target_network = TargetNetwork(self,
                                            parameters.rl.target_update_rate)

        self.reward_burnin = parameters.rl.reward_burnin
        self.maxq_learning = parameters.rl.maxq_learning
        self.rl_discount_rate = parameters.rl.gamma

        self.training_iteration = 0
        self.minibatch_size = parameters.training.minibatch_size