Beispiel #1
0
    def build_trainer(self) -> QRDQNTrainer:
        net_builder = self.net_builder.value
        q_network = net_builder.build_q_network(
            self.state_normalization_data,
            len(self.action_names),
            # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `num_atoms`.
            # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `num_atoms`.
            num_atoms=self.trainer_param.num_atoms,
        )

        if self.use_gpu:
            q_network = q_network.cuda()

        q_network_target = q_network.get_target_network()

        reward_network, q_network_cpe, q_network_cpe_target = None, None, None
        # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `evaluation`.
        # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `evaluation`.
        if self.trainer_param.evaluation.calc_cpe_in_training:
            # Metrics + reward
            num_output_nodes = (len(self.metrics_to_score) + 1) * len(
                # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `actions`.
                # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `actions`.
                self.trainer_param.actions
            )

            cpe_net_builder = self.cpe_net_builder.value
            reward_network = cpe_net_builder.build_q_network(
                self.state_feature_config,
                self.state_normalization_data,
                num_output_nodes,
            )
            q_network_cpe = cpe_net_builder.build_q_network(
                self.state_feature_config,
                self.state_normalization_data,
                num_output_nodes,
            )

            if self.use_gpu:
                reward_network.cuda()
                q_network_cpe.cuda()

            q_network_cpe_target = q_network_cpe.get_target_network()

        # pyre-fixme[16]: `DiscreteQRDQN` has no attribute `_q_network`.
        self._q_network = q_network
        trainer = QRDQNTrainer(
            q_network=q_network,
            q_network_target=q_network_target,
            reward_network=reward_network,
            q_network_cpe=q_network_cpe,
            q_network_cpe_target=q_network_cpe_target,
            metrics_to_score=self.metrics_to_score,
            loss_reporter=NoOpLossReporter(),
            use_gpu=self.use_gpu,
            # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `asdict`.
            # pyre-fixme[16]: `QRDQNTrainerParameters` has no attribute `asdict`.
            **self.trainer_param.asdict(),
        )
        return trainer
Beispiel #2
0
    def build_trainer(self) -> C51Trainer:
        net_builder = self.net_builder.value
        q_network = net_builder.build_q_network(
            state_normalization_data=self.state_normalization_data,
            output_dim=len(self.action_names),
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`.
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `num_atoms`.
            num_atoms=self.trainer_param.num_atoms,
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`.
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmin`.
            qmin=self.trainer_param.qmin,
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`.
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `qmax`.
            qmax=self.trainer_param.qmax,
        )

        if self.use_gpu:
            q_network = q_network.cuda()

        q_network_target = q_network.get_target_network()

        # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`.
        # pyre-fixme[16]: `DiscreteC51DQN` has no attribute `_q_network`.
        self._q_network = q_network

        return C51Trainer(
            q_network=q_network,
            q_network_target=q_network_target,
            metrics_to_score=self.metrics_to_score,
            loss_reporter=NoOpLossReporter(),
            use_gpu=self.use_gpu,
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`.
            # pyre-fixme[16]: `C51TrainerParameters` has no attribute `asdict`.
            **self.trainer_param.asdict(),
        )
Beispiel #3
0
    def __init__(self, seq2reward_network: Seq2RewardNetwork,
                 params: Seq2RewardTrainerParameters):
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.optimizer = torch.optim.Adam(self.seq2reward_network.parameters(),
                                          lr=params.learning_rate)
        self.minibatch_size = self.params.batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = self.params.calc_cpe_in_training
Beispiel #4
0
    def build_trainer(self) -> DQNTrainer:
        net_builder = self.net_builder.value
        q_network = net_builder.build_q_network(
            self.state_feature_config,
            self.state_normalization_parameters,
            len(self.action_names),
        )

        if self.use_gpu:
            q_network = q_network.cuda()

        q_network_target = q_network.get_target_network()

        reward_network, q_network_cpe, q_network_cpe_target = None, None, None
        if self.trainer_param.evaluation.calc_cpe_in_training:
            # Metrics + reward
            num_output_nodes = (len(self.metrics_to_score) + 1) * len(
                self.trainer_param.actions)

            cpe_net_builder = self.cpe_net_builder.value
            reward_network = cpe_net_builder.build_q_network(
                self.state_feature_config,
                self.state_normalization_parameters,
                num_output_nodes,
            )
            q_network_cpe = cpe_net_builder.build_q_network(
                self.state_feature_config,
                self.state_normalization_parameters,
                num_output_nodes,
            )

            if self.use_gpu:
                reward_network.cuda()
                q_network_cpe.cuda()

            q_network_cpe_target = q_network_cpe.get_target_network()

        self._q_network = q_network
        trainer = DQNTrainer(
            q_network,
            q_network_target,
            reward_network,
            self.trainer_param,
            self.use_gpu,
            q_network_cpe=q_network_cpe,
            q_network_cpe_target=q_network_cpe_target,
            metrics_to_score=self.metrics_to_score,
            loss_reporter=NoOpLossReporter(),
        )
        return trainer
Beispiel #5
0
    def __init__(self, seq2reward_network: Seq2RewardNetwork,
                 params: Seq2RewardTrainerParameters):
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.optimizer = torch.optim.Adam(self.seq2reward_network.parameters(),
                                          lr=params.learning_rate)
        self.minibatch_size = self.params.batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = True
        # Turning off Q value output during training:
        self.view_q_value = params.view_q_value
        # permutations used to do planning
        device = get_device(self.seq2reward_network)
        self.all_permut = gen_permutations(
            params.multi_steps, len(self.params.action_names)).to(device)
 def __init__(
     self,
     seq2slate_net: Seq2SlateTransformerNet,
     minibatch_size: int = 1024,
     loss_reporter=None,
     use_gpu: bool = False,
     policy_optimizer: Optimizer__Union = field(  # noqa: B008
         default_factory=Optimizer__Union.default),
 ) -> None:
     self.loss_reporter = loss_reporter
     self.use_gpu = use_gpu
     self.seq2slate_net = seq2slate_net
     self.minibatch_size = minibatch_size
     self.minibatch = 0
     self.optimizer = policy_optimizer.make_optimizer_scheduler(
         self.seq2slate_net.parameters())["optimizer"]
     self.log_softmax = nn.LogSoftmax(dim=1)
     self.kl_loss = nn.KLDivLoss(reduction="batchmean")
     if self.loss_reporter is None:
         self.loss_reporter = NoOpLossReporter()
Beispiel #7
0
    def build_trainer(self) -> C51Trainer:
        net_builder = self.net_builder.value
        q_network = net_builder.build_q_network(
            self.state_normalization_parameters, len(self.action_names)
        )

        if self.use_gpu:
            q_network = q_network.cuda()

        q_network_target = q_network.get_target_network()

        self._q_network = q_network

        return C51Trainer(
            q_network,
            q_network_target,
            self.trainer_param,
            self.use_gpu,
            metrics_to_score=self.metrics_to_score,
            loss_reporter=NoOpLossReporter(),
        )
    def __init__(
        self,
        compress_model_network: FullyConnectedNetwork,
        seq2reward_network: Seq2RewardNetwork,
        params: Seq2RewardTrainerParameters,
    ):
        self.compress_model_network = compress_model_network
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.optimizer = torch.optim.Adam(
            self.compress_model_network.parameters(),
            lr=params.compress_model_learning_rate,
        )
        self.minibatch_size = self.params.compress_model_batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = True
        # permutations used to do planning
        device = get_device(self.compress_model_network)
        self.all_permut = gen_permutations(
            params.multi_steps, len(self.params.action_names)).to(device)
Beispiel #9
0
 def __init__(
     self,
     seq2slate_net: Seq2SlateTransformerNet,
     parameters: TransformerParameters,
     minibatch_size: int,
     loss_reporter=None,
     use_gpu: bool = False,
 ) -> None:
     self.parameters = parameters
     self.loss_reporter = loss_reporter
     self.use_gpu = use_gpu
     self.seq2slate_net = seq2slate_net
     self.minibatch_size = minibatch_size
     self.minibatch = 0
     self.optimizer = torch.optim.Adam(
         self.seq2slate_net.parameters(),
         lr=self.parameters.learning_rate,
         amsgrad=True,
     )
     self.log_softmax = nn.LogSoftmax(dim=1)
     self.kl_loss = nn.KLDivLoss(reduction="batchmean")
     if self.loss_reporter is None:
         self.loss_reporter = NoOpLossReporter()
Beispiel #10
0
    def __init__(
        self, seq2reward_network: Seq2RewardNetwork, params: Seq2RewardTrainerParameters
    ):
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.mse_optimizer = torch.optim.Adam(
            self.seq2reward_network.parameters(), lr=params.learning_rate
        )
        self.minibatch_size = self.params.batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = True
        # Turning off Q value output during training:
        self.view_q_value = params.view_q_value
        # permutations used to do planning
        self.all_permut = gen_permutations(
            params.multi_steps, len(self.params.action_names)
        )
        self.mse_loss = nn.MSELoss(reduction="mean")

        # Predict how many steps are remaining from the current step
        self.step_predict_network = FullyConnectedNetwork(
            [
                self.seq2reward_network.state_dim,
                self.params.step_predict_net_size,
                self.params.step_predict_net_size,
                self.params.multi_steps,
            ],
            ["relu", "relu", "linear"],
            use_layer_norm=False,
        )
        self.step_loss = nn.CrossEntropyLoss(reduction="mean")
        self.step_optimizer = torch.optim.Adam(
            self.step_predict_network.parameters(), lr=params.learning_rate
        )