def __init__( self, state_dim: int, candidate_dim: int, num_stacked_layers: int, num_heads: int, dim_model: int, dim_feedforward: int, max_src_seq_len: int, max_tgt_seq_len: int, ): """ A reward network that predicts slate reward. It uses a transformer-based encoder to encode the items shown in the slate. The slate reward is predicted by attending all encoder steps' outputs. For convenience, Seq2SlateRewardModel and Seq2SlateTransformerModel share the same parameter notations. Therefore, the reward model's encoder is actually applied on target sequences (i.e., slates) referred in Seq2SlateTransformerModel. Note that max_src_seq_len is the """ super().__init__() self.state_dim = state_dim self.candidate_dim = candidate_dim self.num_stacked_layers = num_stacked_layers self.num_heads = num_heads self.dim_model = dim_model self.dim_feedforward = dim_feedforward self.max_src_seq_len = max_src_seq_len self.max_tgt_seq_len = max_tgt_seq_len c = copy.deepcopy attn = MultiHeadedAttention(num_heads, dim_model) ff = PositionwiseFeedForward(dim_model, dim_feedforward) self.encoder = Encoder(EncoderLayer(dim_model, c(attn), c(ff)), num_stacked_layers) self.decoder = Decoder( DecoderLayer(dim_model, c(attn), c(attn), c(ff)), num_stacked_layers) self.candidate_embedder = Embedder(candidate_dim, dim_model // 2) self.state_embedder = Embedder(state_dim, dim_model // 2) self.positional_encoding = PositionalEncoding( dim_model, max_len=2 * (max_src_seq_len + max_tgt_seq_len)) self.proj = nn.Linear(dim_model, 1) self.decoder_start_vec = nn.Parameter(torch.zeros(candidate_dim), requires_grad=True) # Initialize parameters with Glorot / fan_avg. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__( self, state_dim: int, candidate_dim: int, num_stacked_layers: int, num_heads: int, dim_model: int, dim_feedforward: int, max_src_seq_len: int, max_tgt_seq_len: int, ): """ A reward network that predicts slate reward. It uses a transformer-based encoder to encode the items shown in the slate. The slate reward is predicted by attending all encoder steps' outputs. """ super().__init__( state_dim, candidate_dim, dim_model, num_stacked_layers, max_src_seq_len, max_tgt_seq_len, ) self.num_heads = num_heads self.dim_feedforward = dim_feedforward c = copy.deepcopy attn = MultiHeadedAttention(num_heads, dim_model) ff = PositionwiseFeedForward(dim_model, dim_feedforward) self.encoder = Encoder( EncoderLayer(dim_model, c(attn), c(ff)), num_stacked_layers ) self.decoder = Decoder( DecoderLayer(dim_model, c(attn), c(attn), c(ff)), num_stacked_layers ) self.positional_encoding = PositionalEncoding( dim_model, max_len=2 * (max_src_seq_len + max_tgt_seq_len) ) self.proj = nn.Linear(dim_model, 1) self.decoder_start_vec = nn.Parameter( torch.zeros(candidate_dim), requires_grad=True ) self._init_params()