def test_transformer_decoder(num_beams, decoder_inputs_transformer): batch_size, emb_size, nlayers, sl, vin, ven = decoder_inputs_transformer ntokens, nhid, max_tokens = 10, 2, 20 embedding = TransformerEmbeddings(ntokens=ntokens, emb_size=emb_size, dropout=0.0, pad_token=1) encoder = TransformerDecoderLayers(nlayers=nlayers, input_size=emb_size, num_heads=2, nhid=emb_size) projection_layer = Projection(output_size=ntokens, input_size=emb_size, tie_encoder=None, dropout=0.0) decoder = TransformerDecoder(decoder_layer=encoder, projection_layer=projection_layer, pad_token=1, eos_token=2, max_tokens=max_tokens, embedding_layer=embedding) decoder = to_gpu(decoder) outputs = decoder(vin, ven, num_beams=num_beams) if num_beams > 0: assert_dims(outputs, [None, num_beams * batch_size, (emb_size, ntokens)]) # actual beam outputs can be found in beam_outputs assert decoder.beam_outputs is not None assert_dims(decoder.beam_outputs, [None, batch_size, num_beams]) # the sl can go up to max_tokens + 1(for the extra 0 token at the end) assert 0 < decoder.beam_outputs.shape[0] <= max_tokens + 1 else: assert_dims(outputs, [None, batch_size, (emb_size, ntokens)]) assert decoder.beam_outputs is None
def __init__(self, ntoken, emb_size=512, nlayers=6, pad_token=None, eos_token=None, max_tokens=200, share_embedding_layer=False, tie_decoder=True, **kwargs): super().__init__() ntoken = get_list(ntoken, 2) self.nlayers = nlayers dropout = get_kwarg(kwargs, name="dropout", default_value=0.1) num_heads = get_kwarg(kwargs, name="num_heads", default_value=8) nhid = get_kwarg(kwargs, name="nhid", default_value=2048) encoder_embedding_layer = TransformerEmbeddings(ntokens=ntoken[0], emb_size=emb_size, dropout=dropout, pad_token=pad_token) encoder_layer = TransformerEncoderLayers(num_layers=nlayers, input_size=emb_size, num_heads=num_heads, nhid=nhid) self.encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_layer) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = TransformerEmbeddings( ntokens=ntoken[-1], emb_size=emb_size, dropout=dropout, pad_token=pad_token) decoder_layer = TransformerDecoderLayers(nlayers=nlayers, input_size=emb_size, num_heads=num_heads, nhid=nhid) projection_layer = Projection( output_size=ntoken[-1], input_size=emb_size, dropout=dropout, tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = TransformerDecoder( decoder_layer=decoder_layer, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, ) self.nt = ntoken[-1] # xavier uniform initialization for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def rnn_decoder(decoder_params): decoder_embedding_layer = DropoutEmbeddings( ntokens=decoder_params.ntokens, emb_size=decoder_params.emb_size, ) if decoder_params.attention: # attention decoder must have double the input_size to accommodate for the attention concat decoder_rnn = RNNLayers(input_size=decoder_params.emb_size * 2, output_size=decoder_params.emb_size, nhid=decoder_params.nhid, bidir=False, nlayers=decoder_params.nlayers, cell_type="gru") projection_layer = AttentionProjection( output_size=decoder_params.ntokens, input_size=decoder_params.emb_size, att_nhid=decoder_params.att_hid, tie_encoder=None, dropout=0.0) decoder = AttentionDecoder(decoder_layer=decoder_rnn, embedding_layer=decoder_embedding_layer, projection_layer=projection_layer, pad_token=1, eos_token=2, max_tokens=decoder_params.max_tokens) else: decoder_rnn = RNNLayers(input_size=decoder_params.emb_size, output_size=decoder_params.emb_size, nhid=decoder_params.nhid, bidir=False, nlayers=decoder_params.nlayers, cell_type="gru") projection_layer = Projection(output_size=decoder_params.ntokens, input_size=decoder_params.emb_size, dropout=0.0, tie_encoder=None) decoder = Decoder( decoder_layer=decoder_rnn, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=0, eos_token=1, max_tokens=decoder_params.max_tokens, ) decoder = to_gpu(decoder) decoder.reset(decoder_params.batch_size) return decoder, decoder_params
def rnn_decoder(decoder_params): if decoder_params.attention: decoder = RNNAttentionDecoder(cell_type="gru", ntoken=decoder_params.ntokens, emb_sz=decoder_params.emb_size, nhid=decoder_params.nhid, nlayers=decoder_params.nlayers, pad_token=1, eos_token=2, max_tokens=decoder_params.max_tokens) decoder.projection_layer = AttentionProjection(n_out=decoder_params.ntokens, n_in=decoder_params.emb_size, att_nhid=decoder_params.att_hid, tie_encoder=None, dropout=0.0) else: decoder = EmbeddingRNNDecoder(cell_type="gru", ntoken=decoder_params.ntokens, emb_sz=decoder_params.emb_size, nhid=decoder_params.nhid, nlayers=decoder_params.nlayers, pad_token=1, eos_token=2, max_tokens=decoder_params.max_tokens) decoder.projection_layer = Projection(n_out=decoder_params.ntokens, n_in=decoder_params.emb_size, tie_encoder=None, dropout=0.0) decoder = to_gpu(decoder) decoder.reset(decoder_params.batch_size) return decoder, decoder_params
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, session_constraint: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder session_constraint (bool) If true the session will be concated as a constraint to the decoder input **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list( emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3) dropoutd = get_kwarg(kwargs, name="dropout_d", default_value=0.5) # output dropout dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) dropouth = get_kwarg(kwargs, name="dropout_h", default_value=0.3) # RNN output layers dropout dropouth = get_list(dropouth, 3) wdrop = get_kwarg(kwargs, name="wdrop", default_value=0.5) # RNN weights dropout wdrop = get_list(wdrop, 3) train_init = kwargs.pop( "train_init", False) # Have trainable initial states to the RNNs dropoutinit = get_kwarg( kwargs, name="dropout_init", default_value=0.1) # RNN initial states dropout dropoutinit = get_list(dropoutinit, 3) self.cell_type = "gru" self.nt = ntoken[-1] self.pr_force = 1.0 encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[0], dropoute=dropoute[0], dropouti=dropouti[0]) encoder_rnn = RNNLayers(input_size=emb_sz[0], output_size=kwargs.get("output_size_encoder", emb_sz[0]), nhid=nhid[0], bidir=bidir, dropouth=dropouth[0], wdrop=wdrop[0], nlayers=nlayers[0], cell_type=self.cell_type, train_init=train_init, dropoutinit=dropoutinit[0]) self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_rnn) self.se_enc = RNNLayers(cell_type=self.cell_type, input_size=encoder_rnn.output_size, output_size=nhid[1], nhid=nhid[1], nlayers=1, dropouth=dropouth[1], wdrop=wdrop[1], train_init=train_init, dropoutinit=dropoutinit[1]) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[1], dropoute=dropoute[1], dropouti=dropouti[1]) input_size_decoder = kwargs.get("input_size_decoder", emb_sz[1]) input_size_decoder = input_size_decoder + self.se_enc.output_size if session_constraint else input_size_decoder decoder_rnn = RNNLayers(input_size=input_size_decoder, output_size=kwargs.get("output_size_decoder", emb_sz[1]), nhid=nhid[2], bidir=False, dropouth=dropouth[2], wdrop=wdrop[2], nlayers=nlayers[2], cell_type=self.cell_type, train_init=train_init, dropoutinit=dropoutinit[2]) self.session_constraint = session_constraint # allow for changing sizes of decoder output input_size = decoder_rnn.output_size nhid = emb_sz[1] if input_size != emb_sz[1] else None projection_layer = Projection( output_size=ntoken[0], input_size=input_size, nhid=nhid, dropout=dropoutd, tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = Decoder( decoder_layer=decoder_rnn, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, ) self.decoder_state_linear = nn.Linear( in_features=self.se_enc.output_size, out_features=self.decoder.layers[0].output_size)
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list( emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3) dropoutd, kwargs = get_kwarg(kwargs, name="dropoutd", default_value=0.5) self.cell_type = "gru" self.query_encoder = EmbeddingRNNEncoder(ntoken=ntoken[0], emb_sz=emb_sz[0], nhid=nhid[0], nlayers=nlayers[0], pad_token=pad_token, bidir=bidir, out_dim=nhid[0], cell_type=self.cell_type, **kwargs) self.session_encoder = RNNEncoder(in_dim=nhid[0], nhid=nhid[1], out_dim=nhid[2], nlayers=1, bidir=False, cell_type=self.cell_type, **kwargs) self.decoder = EmbeddingRNNDecoder( ntoken=ntoken[-1], emb_sz=emb_sz[-1], nhid=nhid[-1], nlayers=nlayers[-1], pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, # Share the embedding layer between encoder and decoder embedding_layer=self.query_encoder.encoder_with_dropout.embed if share_embedding_layer else None, # potentially tie the output projection with the decoder embedding cell_type=self.cell_type, out_dim=nhid[-1], **kwargs) enc = self.decoder.encoder if tie_decoder else None self.decoder.projection_layer = Projection( n_out=ntoken[-1], n_in=nhid[-1], nhid=emb_sz[-1], dropout=dropoutd, tie_encoder=enc if tie_decoder else None) self.decoder_state_linear = nn.Linear( in_features=nhid[-1], out_features=self.decoder.rnns[0].output_size) self.nt = ntoken[-1]
def __init__(self, ntoken: HParam, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list( emb_sz, 2), get_list(nhid, 2), get_list(nlayers, 2) dropoutd = get_kwarg(kwargs, name="dropout_d", default_value=0.5) # output dropout dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) dropouth = get_kwarg(kwargs, name="dropout_h", default_value=0.3) # RNN output layers dropout dropouth = get_list(dropouth, 2) wdrop = get_kwarg(kwargs, name="wdrop", default_value=0.5) # RNN weights dropout wdrop = get_list(wdrop, 2) self.cell_type = get_kwarg(kwargs, name="cell_type", default_value="lstm") encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[0], dropoute=dropoute[0], dropouti=dropouti[0]) self.bidir = bidir self.nlayers = nlayers[0] self.nt = ntoken[-1] # number of possible tokens self.pr_force = 1.0 # teacher forcing probability encoder_rnn = RNNLayers( input_size=emb_sz[0], output_size=kwargs.get("out_dim", emb_sz[0]), nhid=nhid[0], bidir=bidir, dropouth=dropouth[0], wdrop=wdrop[0], nlayers=nlayers[0], cell_type=self.cell_type, ) self.encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_rnn) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[-1], emb_size=emb_sz[-1], dropoute=dropoute[1], dropouti=dropouti[1]) decoder_rnn = RNNLayers(input_size=kwargs.get("input_size", emb_sz[-1]), output_size=kwargs.get("output_size", emb_sz[-1]), nhid=nhid[-1], bidir=False, dropouth=dropouth[1], wdrop=wdrop[1], nlayers=nlayers[-1], cell_type=self.cell_type) projection_layer = Projection( output_size=ntoken[-1], input_size=emb_sz[-1], dropout=dropoutd, tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = Decoder( decoder_layer=decoder_rnn, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, )
def __init__(self, ntoken: HParam, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super(Seq2Seq, self).__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list( emb_sz, 2), get_list(nhid, 2), get_list(nlayers, 2) if "dropoutd" in kwargs: dropoutd = kwargs.pop("dropoutd") else: dropoutd = 0.5 self.encoder = EmbeddingRNNEncoder(ntoken=ntoken[0], emb_sz=emb_sz[0], nhid=nhid[0], nlayers=nlayers[0], pad_token=pad_token, bidir=bidir, **kwargs) self.decoder = EmbeddingRNNDecoder( ntoken=ntoken[-1], emb_sz=emb_sz[-1], nhid=nhid[-1], nlayers=nlayers[-1], pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, # Share the embedding layer between encoder and decoder embedding_layer=self.encoder.encoder_with_dropout.embed if share_embedding_layer else None, # potentially tie the output projection with the decoder embedding **kwargs) enc = self.decoder.encoder if tie_decoder else None self.decoder.projection_layer = Projection( n_out=ntoken[-1], n_in=emb_sz[-1], dropout=dropoutd, tie_encoder=enc if tie_decoder else None) self.nt = ntoken[-1]