Beispiel #1
0
    def __init__(self, seq2reward_network: Seq2RewardNetwork,
                 params: Seq2RewardTrainerParameters):
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.optimizer = torch.optim.Adam(self.seq2reward_network.parameters(),
                                          lr=params.learning_rate)
        self.minibatch_size = self.params.batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = True
        # Turning off Q value output during training:
        self.view_q_value = params.view_q_value
        # permutations used to do planning
        device = get_device(self.seq2reward_network)
        self.all_permut = gen_permutations(
            params.multi_steps, len(self.params.action_names)).to(device)
    def __init__(
        self,
        compress_model_network: FullyConnectedNetwork,
        seq2reward_network: Seq2RewardNetwork,
        params: Seq2RewardTrainerParameters,
    ):
        self.compress_model_network = compress_model_network
        self.seq2reward_network = seq2reward_network
        self.params = params
        self.optimizer = torch.optim.Adam(
            self.compress_model_network.parameters(),
            lr=params.compress_model_learning_rate,
        )
        self.minibatch_size = self.params.compress_model_batch_size
        self.loss_reporter = NoOpLossReporter()

        # PageHandler must use this to activate evaluator:
        self.calc_cpe_in_training = True
        # permutations used to do planning
        device = get_device(self.compress_model_network)
        self.all_permut = gen_permutations(
            params.multi_steps, len(self.params.action_names)).to(device)