def build_trainer(self) -> C51Trainer: net_builder = self.net_builder.value q_network = net_builder.build_q_network( state_normalization_data=self.state_normalization_data, output_dim=len(self.action_names), # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. num_atoms=self.trainer_param.num_atoms, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. qmin=self.trainer_param.qmin, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. qmax=self.trainer_param.qmax, ) q_network_target = q_network.get_target_network() # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`. # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`. self._q_network = q_network return C51Trainer( q_network=q_network, q_network_target=q_network_target, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. **self.trainer_param.asdict(), )
def build_trainer( self, normalization_data_map: Dict[str, NormalizationData], use_gpu: bool, reward_options: Optional[RewardOptions] = None, ) -> C51Trainer: net_builder = self.net_builder.value q_network = net_builder.build_q_network( state_normalization_data=normalization_data_map[ NormalizationKey.STATE], output_dim=len(self.action_names), # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. num_atoms=self.trainer_param.num_atoms, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. qmin=self.trainer_param.qmin, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. qmax=self.trainer_param.qmax, ) q_network_target = q_network.get_target_network() return C51Trainer( q_network=q_network, q_network_target=q_network_target, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. **self.trainer_param.asdict(), )
def build_trainer(self) -> C51Trainer: net_builder = self.net_builder.value q_network = net_builder.build_q_network( state_normalization_data=self.state_normalization_data, output_dim=len(self.action_names), # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`. num_atoms=self.trainer_param.num_atoms, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`. qmin=self.trainer_param.qmin, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`. qmax=self.trainer_param.qmax, ) if self.use_gpu: q_network = q_network.cuda() q_network_target = q_network.get_target_network() # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`. # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`. self._q_network = q_network # pyre-fixme[29]: `Type[reagent.training.c51_trainer.C51Trainer]` is not a # function. # pyre-fixme[29]: `Type[reagent.training.c51_trainer.C51Trainer]` is not a # function. return C51Trainer( q_network=q_network, q_network_target=q_network_target, metrics_to_score=self.metrics_to_score, loss_reporter=NoOpLossReporter(), use_gpu=self.use_gpu, # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`. **self.trainer_param.asdict(), )
def create_dqn_trainer_from_params( model: DiscreteActionModelParameters, normalization_parameters: Dict[int, NormalizationParameters], use_gpu: bool = False, use_all_avail_gpus: bool = False, metrics_to_score=None, ): metrics_to_score = metrics_to_score or [] if model.rainbow.quantile: q_network = FullyConnectedDQN( state_dim=get_num_output_features(normalization_parameters), action_dim=len(model.actions), num_atoms=model.rainbow.num_atoms, sizes=model.training.layers[1:-1], activations=model.training.activations[:-1], dropout_ratio=model.training.dropout_ratio, ) elif model.rainbow.categorical: distributional_network = FullyConnectedDQN( state_dim=get_num_output_features(normalization_parameters), action_dim=len(model.actions), num_atoms=model.rainbow.num_atoms, sizes=model.training.layers[1:-1], activations=model.training.activations[:-1], dropout_ratio=model.training.dropout_ratio, ) q_network = CategoricalDQN( # type: ignore distributional_network, qmin=model.rainbow.qmin, qmax=model.rainbow.qmax, num_atoms=model.rainbow.num_atoms, ) elif model.rainbow.dueling_architecture: q_network = DuelingQNetwork.make_fully_connected( state_dim=get_num_output_features(normalization_parameters), action_dim=len(model.actions), layers=model.training.layers[1:-1], activations=model.training.activations[:-1], ) else: q_network = FullyConnectedDQN( state_dim=get_num_output_features(normalization_parameters), action_dim=len(model.actions), sizes=model.training.layers[1:-1], activations=model.training.activations[:-1], dropout_ratio=model.training.dropout_ratio, ) if use_gpu and torch.cuda.is_available(): q_network = q_network.cuda() q_network_target = q_network.get_target_network() reward_network, q_network_cpe, q_network_cpe_target = None, None, None if model.evaluation.calc_cpe_in_training: # Metrics + reward num_output_nodes = (len(metrics_to_score) + 1) * len(model.actions) reward_network = FullyConnectedDQN( state_dim=get_num_output_features(normalization_parameters), action_dim=num_output_nodes, sizes=model.training.layers[1:-1], activations=model.training.activations[:-1], dropout_ratio=model.training.dropout_ratio, ) q_network_cpe = FullyConnectedDQN( state_dim=get_num_output_features(normalization_parameters), action_dim=num_output_nodes, sizes=model.training.layers[1:-1], activations=model.training.activations[:-1], dropout_ratio=model.training.dropout_ratio, ) if use_gpu and torch.cuda.is_available(): reward_network.cuda() q_network_cpe.cuda() q_network_cpe_target = q_network_cpe.get_target_network() if (use_all_avail_gpus and not model.rainbow.categorical and not model.rainbow.quantile): q_network = q_network.get_distributed_data_parallel_model() reward_network = (reward_network.get_distributed_data_parallel_model() if reward_network else None) q_network_cpe = (q_network_cpe.get_distributed_data_parallel_model() if q_network_cpe else None) if model.rainbow.quantile: assert (not use_all_avail_gpus ), "use_all_avail_gpus not implemented for distributional RL" parameters = QRDQNTrainerParameters.from_discrete_action_model_parameters( model) return QRDQNTrainer( q_network, q_network_target, parameters, use_gpu, metrics_to_score=metrics_to_score, reward_network=reward_network, q_network_cpe=q_network_cpe, q_network_cpe_target=q_network_cpe_target, ) elif model.rainbow.categorical: assert (not use_all_avail_gpus ), "use_all_avail_gpus not implemented for distributional RL" return C51Trainer( q_network, q_network_target, C51TrainerParameters.from_discrete_action_model_parameters(model), use_gpu, metrics_to_score=metrics_to_score, ) else: parameters = DQNTrainerParameters.from_discrete_action_model_parameters( model) return DQNTrainer( q_network, q_network_target, reward_network, parameters, use_gpu, q_network_cpe=q_network_cpe, q_network_cpe_target=q_network_cpe_target, metrics_to_score=metrics_to_score, )