Esempio n. 1
0
    def __init__(
        self,
        seq2slate_net: Seq2SlateTransformerNet,
        reward_net_path: str,
        minibatch_size: int,
        parameters: Seq2SlateParameters,
        baseline_net: Optional[BaselineNet] = None,
        baseline_warmup_num_batches: int = 0,
        use_gpu: bool = False,
        policy_optimizer: Optimizer__Union = field(  # noqa: B008
            default_factory=Optimizer__Union.default
        ),
        baseline_optimizer: Optimizer__Union = field(  # noqa: B008
            default_factory=Optimizer__Union.default
        ),
    ) -> None:
        self.reward_net_path = reward_net_path
        # loaded when used
        self.reward_net = None
        self.parameters = parameters
        self.minibatch_size = minibatch_size
        self.use_gpu = use_gpu
        self.device = torch.device("cuda") if use_gpu else torch.device("cpu")
        self.permutation_index = torch.tensor(
            list(
                permutations(
                    # pyre-fixme[6]: Expected `Iterable[Variable[itertools._T]]` for
                    #  1st param but got `Tensor`.
                    torch.arange(seq2slate_net.max_src_seq_len),
                    seq2slate_net.max_tgt_seq_len,
                )
            ),
            device=self.device,
        ).long()

        if self.parameters.simulation_distance_penalty is not None:
            # pyre-fixme[16]: `Optional` has no attribute `__gt__`.
            assert self.parameters.simulation_distance_penalty > 0
            self.permutation_distance = (
                torch.tensor(
                    [swap_dist(x.tolist()) for x in self.permutation_index],
                    device=self.device,
                )
                .unsqueeze(1)
                .float()
            )
            self.MAX_DISTANCE = torch.max(self.permutation_distance)

        self.trainer = Seq2SlateTrainer(
            seq2slate_net,
            minibatch_size,
            self.parameters,
            baseline_net=baseline_net,
            baseline_warmup_num_batches=baseline_warmup_num_batches,
            use_gpu=use_gpu,
            policy_optimizer=policy_optimizer,
            baseline_optimizer=baseline_optimizer,
        )
        self.seq2slate_net = self.trainer.seq2slate_net
        self.baseline_net = self.trainer.baseline_net
Esempio n. 2
0
 def __init__(
     self,
     seq2slate_net: Seq2SlateTransformerNet,
     parameters: Seq2SlateTransformerParameters,
     minibatch_size: int,
     reward_net_path: str,
     baseline_net: Optional[BaselineNet] = None,
     use_gpu: bool = False,
 ) -> None:
     self.reward_net_path = reward_net_path
     # loaded when used
     self.reward_net = None
     self.parameters = parameters
     self.minibatch_size = minibatch_size
     self.use_gpu = use_gpu
     self.device = torch.device("cuda") if use_gpu else torch.device("cpu")
     self.permutation_index = torch.tensor(
         list(
             permutations(
                 torch.arange(seq2slate_net.max_src_seq_len),
                 seq2slate_net.max_tgt_seq_len,
             )),
         device=self.device,
     ).long()
     self.trainer = Seq2SlateTrainer(seq2slate_net, parameters,
                                     minibatch_size, baseline_net, use_gpu)
     self.seq2slate_net = self.trainer.seq2slate_net
     self.baseline_net = self.trainer.baseline_net
Esempio n. 3
0
def create_trainer(seq2slate_net, batch_size, learning_rate, device,
                   on_policy):
    use_gpu = False if device == torch.device("cpu") else True
    return Seq2SlateTrainer(
        seq2slate_net=seq2slate_net,
        minibatch_size=batch_size,
        parameters=Seq2SlateParameters(on_policy=on_policy),
        policy_optimizer=Optimizer__Union.default(lr=learning_rate),
        use_gpu=use_gpu,
        print_interval=100,
    )
def create_trainer(
    seq2slate_net,
    learning_rate,
    seq2slate_params,
    policy_gradient_interval,
):
    return Seq2SlateTrainer(
        seq2slate_net=seq2slate_net,
        params=seq2slate_params,
        policy_optimizer=Optimizer__Union(SGD=classes["SGD"](
            lr=learning_rate)),
        policy_gradient_interval=policy_gradient_interval,
        print_interval=1,
    )
Esempio n. 5
0
def create_trainer(
    seq2slate_net,
    batch_size,
    learning_rate,
    device,
    seq2slate_params,
    policy_gradient_interval,
):
    use_gpu = False if device == torch.device("cpu") else True
    return Seq2SlateTrainer(
        seq2slate_net=seq2slate_net,
        minibatch_size=batch_size,
        parameters=seq2slate_params,
        policy_optimizer=Optimizer__Union(SGD=classes["SGD"](lr=learning_rate)),
        use_gpu=use_gpu,
        policy_gradient_interval=policy_gradient_interval,
        print_interval=1,
    )
Esempio n. 6
0
 def __init__(
     self,
     seq2slate_net: Seq2SlateTransformerNet,
     minibatch_size: int,
     parameters: Seq2SlateParameters,
     baseline_net: Optional[BaselineNet] = None,
     baseline_warmup_num_batches: int = 0,
     use_gpu: bool = False,
     policy_optimizer: Optimizer__Union = field(  # noqa: B008
         default_factory=Optimizer__Union.default
     ),
     baseline_optimizer: Optimizer__Union = field(  # noqa: B008
         default_factory=Optimizer__Union.default
     ),
     policy_gradient_interval: int = 1,
     print_interval: int = 100,
 ) -> None:
     self.sim_param = parameters.simulation
     assert self.sim_param is not None
     # loaded when used
     self.reward_name_and_net = {}
     self.parameters = parameters
     self.minibatch_size = minibatch_size
     self.use_gpu = use_gpu
     self.policy_gradient_interval = policy_gradient_interval
     self.print_interval = print_interval
     self.device = torch.device("cuda") if use_gpu else torch.device("cpu")
     self.MAX_DISTANCE = (
         seq2slate_net.max_src_seq_len * (seq2slate_net.max_src_seq_len - 1) / 2
     )
     self.trainer = Seq2SlateTrainer(
         seq2slate_net,
         minibatch_size,
         self.parameters,
         baseline_net=baseline_net,
         baseline_warmup_num_batches=baseline_warmup_num_batches,
         use_gpu=use_gpu,
         policy_optimizer=policy_optimizer,
         baseline_optimizer=baseline_optimizer,
         policy_gradient_interval=policy_gradient_interval,
         print_interval=print_interval,
     )
     self.seq2slate_net = self.trainer.seq2slate_net
     self.baseline_net = self.trainer.baseline_net
Esempio n. 7
0
    def __init__(
        self,
        seq2slate_net: Seq2SlateTransformerNet,
        parameters: Seq2SlateTransformerParameters,
        minibatch_size: int,
        reward_net_path: str,
        baseline_net: Optional[BaselineNet] = None,
        use_gpu: bool = False,
    ) -> None:
        self.reward_net_path = reward_net_path
        # loaded when used
        self.reward_net = None
        self.parameters = parameters
        self.minibatch_size = minibatch_size
        self.use_gpu = use_gpu
        self.device = torch.device("cuda") if use_gpu else torch.device("cpu")
        self.permutation_index = torch.tensor(
            list(
                permutations(
                    torch.arange(seq2slate_net.max_src_seq_len),
                    seq2slate_net.max_tgt_seq_len,
                )),
            device=self.device,
        ).long()

        if self.parameters.simulation_distance_penalty is not None:
            assert self.parameters.simulation_distance_penalty > 0
            self.permutation_distance = torch.tensor(
                [swap_dist(x.tolist()) for x in self.permutation_index],
                device=self.device,
            ).float()
            self.MAX_DISTANCE = torch.max(self.permutation_distance)

        self.trainer = Seq2SlateTrainer(seq2slate_net, parameters,
                                        minibatch_size, baseline_net, use_gpu)
        self.seq2slate_net = self.trainer.seq2slate_net
        self.baseline_net = self.trainer.baseline_net