コード例 #1
0
    def __init__(
        self,
        rl_parameters: RLParameters,
        use_gpu: bool,
        metrics_to_score=None,
        actions: Optional[List[str]] = None,
        evaluation_parameters: Optional[EvaluationParameters] = None,
        loss_reporter=None,
    ) -> None:
        self.minibatch = 0
        self.minibatch_size: Optional[int] = None
        self.minibatches_per_step: Optional[int] = None
        self.rl_parameters = rl_parameters
        self.rl_temperature = float(rl_parameters.temperature)
        self.maxq_learning = rl_parameters.maxq_learning
        self.gamma = rl_parameters.gamma
        self.tau = rl_parameters.target_update_rate
        self.use_seq_num_diff_as_time_diff = rl_parameters.use_seq_num_diff_as_time_diff
        self.time_diff_unit_length = rl_parameters.time_diff_unit_length
        self.tensorboard_logging_freq = rl_parameters.tensorboard_logging_freq
        self.multi_steps = rl_parameters.multi_steps
        self.calc_cpe_in_training = (
            evaluation_parameters and evaluation_parameters.calc_cpe_in_training
        )

        if rl_parameters.q_network_loss == "mse":
            self.q_network_loss = F.mse_loss
        elif rl_parameters.q_network_loss == "huber":
            self.q_network_loss = F.smooth_l1_loss
        else:
            raise Exception(
                "Q-Network loss type {} not valid loss.".format(
                    rl_parameters.q_network_loss
                )
            )

        if metrics_to_score:
            self.metrics_to_score = metrics_to_score + ["reward"]
        else:
            self.metrics_to_score = ["reward"]

        cuda_available = torch.cuda.is_available()
        logger.info("CUDA availability: {}".format(cuda_available))
        if use_gpu and cuda_available:
            logger.info("Using GPU: GPU requested and available.")
            self.use_gpu = True
            self.device = torch.device("cuda")  # type: ignore
        else:
            logger.info("NOT Using GPU: GPU not requested or not available.")
            self.use_gpu = False
            self.device = torch.device("cpu")  # type: ignore

        self.loss_reporter = loss_reporter or LossReporter(actions)
        self._actions = actions
コード例 #2
0
    def __init__(
        self,
        parameters,
        use_gpu,
        additional_feature_types,
        metrics_to_score=None,
        gradient_handler=None,
        actions: Optional[List[str]] = None,
    ):
        self.minibatch = 0
        self.parameters = parameters
        self.reward_burnin = parameters.rl.reward_burnin
        self._additional_feature_types = additional_feature_types
        self.rl_temperature = parameters.rl.temperature
        self.maxq_learning = parameters.rl.maxq_learning
        self.gamma = parameters.rl.gamma
        self.tau = parameters.rl.target_update_rate
        self.use_seq_num_diff_as_time_diff = parameters.rl.use_seq_num_diff_as_time_diff
        self.time_diff_unit_length = parameters.rl.time_diff_unit_length
        self.gradient_handler = gradient_handler
        self.tensorboard_logging_freq = parameters.rl.tensorboard_logging_freq
        self.multi_steps = parameters.rl.multi_steps

        if parameters.rl.q_network_loss == "mse":
            self.q_network_loss = getattr(F, "mse_loss")
        elif parameters.rl.q_network_loss == "huber":
            self.q_network_loss = getattr(F, "smooth_l1_loss")
        else:
            raise Exception(
                "Q-Network loss type {} not valid loss.".format(
                    parameters.rl.q_network_loss
                )
            )

        if metrics_to_score:
            self.metrics_to_score = metrics_to_score + ["reward"]
        else:
            self.metrics_to_score = ["reward"]

        cuda_available = torch.cuda.is_available()
        logger.info("CUDA availability: {}".format(cuda_available))
        if use_gpu and cuda_available:
            logger.info("Using GPU: GPU requested and available.")
            self.use_gpu = True
            self.dtype = torch.cuda.FloatTensor
            self.dtypelong = torch.cuda.LongTensor
        else:
            logger.info("NOT Using GPU: GPU not requested or not available.")
            self.use_gpu = False
            self.dtype = torch.FloatTensor
            self.dtypelong = torch.LongTensor

        self.loss_reporter = LossReporter(actions)