Ejemplo n.º 1
0
    def __init__(
        self,
        state_dim: int,
        candidate_dim: int,
        num_stacked_layers: int,
        num_heads: int,
        dim_model: int,
        dim_feedforward: int,
        max_src_seq_len: int,
        max_tgt_seq_len: int,
    ):
        """
        A reward network that predicts slate reward.

        It uses a transformer-based encoder to encode the items shown in the slate.
        The slate reward is predicted by attending all encoder steps' outputs.

        For convenience, Seq2SlateRewardModel and Seq2SlateTransformerModel share
        the same parameter notations. Therefore, the reward model's encoder is
        actually applied on target sequences (i.e., slates) referred in
        Seq2SlateTransformerModel.

        Note that max_src_seq_len is the
        """
        super().__init__()
        self.state_dim = state_dim
        self.candidate_dim = candidate_dim
        self.num_stacked_layers = num_stacked_layers
        self.num_heads = num_heads
        self.dim_model = dim_model
        self.dim_feedforward = dim_feedforward

        self.max_src_seq_len = max_src_seq_len
        self.max_tgt_seq_len = max_tgt_seq_len

        c = copy.deepcopy
        attn = MultiHeadedAttention(num_heads, dim_model)
        ff = PositionwiseFeedForward(dim_model, dim_feedforward)
        self.encoder = Encoder(EncoderLayer(dim_model, c(attn), c(ff)),
                               num_stacked_layers)
        self.decoder = Decoder(
            DecoderLayer(dim_model, c(attn), c(attn), c(ff)),
            num_stacked_layers)
        self.candidate_embedder = Embedder(candidate_dim, dim_model // 2)
        self.state_embedder = Embedder(state_dim, dim_model // 2)
        self.positional_encoding = PositionalEncoding(
            dim_model, max_len=2 * (max_src_seq_len + max_tgt_seq_len))
        self.proj = nn.Linear(dim_model, 1)
        self.decoder_start_vec = nn.Parameter(torch.zeros(candidate_dim),
                                              requires_grad=True)

        # Initialize parameters with Glorot / fan_avg.
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
Ejemplo n.º 2
0
    def __init__(
        self,
        state_dim: int,
        candidate_dim: int,
        num_stacked_layers: int,
        num_heads: int,
        dim_model: int,
        dim_feedforward: int,
        max_src_seq_len: int,
        max_tgt_seq_len: int,
    ):
        """
        A reward network that predicts slate reward.

        It uses a transformer-based encoder to encode the items shown in the slate.
        The slate reward is predicted by attending all encoder steps' outputs.
        """
        super().__init__(
            state_dim,
            candidate_dim,
            dim_model,
            num_stacked_layers,
            max_src_seq_len,
            max_tgt_seq_len,
        )
        self.num_heads = num_heads
        self.dim_feedforward = dim_feedforward

        c = copy.deepcopy
        attn = MultiHeadedAttention(num_heads, dim_model)
        ff = PositionwiseFeedForward(dim_model, dim_feedforward)
        self.encoder = Encoder(
            EncoderLayer(dim_model, c(attn), c(ff)), num_stacked_layers
        )
        self.decoder = Decoder(
            DecoderLayer(dim_model, c(attn), c(attn), c(ff)), num_stacked_layers
        )
        self.positional_encoding = PositionalEncoding(
            dim_model, max_len=2 * (max_src_seq_len + max_tgt_seq_len)
        )
        self.proj = nn.Linear(dim_model, 1)
        self.decoder_start_vec = nn.Parameter(
            torch.zeros(candidate_dim), requires_grad=True
        )

        self._init_params()