def __init__(self, num_layers, input_size, num_heads, nhid, dropout=0.1): super().__init__() nhid = get_list(nhid, num_layers) num_heads = get_list(num_heads, num_layers) self.layers = nn.ModuleList( [TransformerLayer(input_size=input_size, nhid=nhid[i], dropout=dropout, num_heads=num_heads[i]) for i in range(num_layers)])
def __init__(self, ntoken, emb_size=512, nlayers=6, pad_token=None, eos_token=None, max_tokens=200, share_embedding_layer=False, tie_decoder=True, **kwargs): super().__init__() ntoken = get_list(ntoken, 2) self.nlayers = nlayers dropout = get_kwarg(kwargs, name="dropout", default_value=0.1) num_heads = get_kwarg(kwargs, name="num_heads", default_value=8) nhid = get_kwarg(kwargs, name="nhid", default_value=2048) encoder_embedding_layer = TransformerEmbeddings(ntokens=ntoken[0], emb_size=emb_size, dropout=dropout, pad_token=pad_token) encoder_layer = TransformerEncoderLayers(num_layers=nlayers, input_size=emb_size, num_heads=num_heads, nhid=nhid) self.encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_layer) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = TransformerEmbeddings( ntokens=ntoken[-1], emb_size=emb_size, dropout=dropout, pad_token=pad_token) decoder_layer = TransformerDecoderLayers(nlayers=nlayers, input_size=emb_size, num_heads=num_heads, nhid=nhid) projection_layer = Projection( output_size=ntoken[-1], input_size=emb_size, dropout=dropout, tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = TransformerDecoder( decoder_layer=decoder_layer, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, ) self.nt = ntoken[-1] # xavier uniform initialization for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, num_constraints: int, constraints_sz: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos latent_dim (int): The dim of the latent variable max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__(ntoken=ntoken, emb_sz=emb_sz, nhid=nhid, nlayers=nlayers, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, share_embedding_layer=share_embedding_layer, tie_decoder=tie_decoder, bidir=bidir, input_size_decoder=emb_sz + constraints_sz ) dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) self.constraint_embeddings = DropoutEmbeddings(ntokens=num_constraints, emb_size=constraints_sz, dropoute=dropoute[-1], dropouti=dropouti[-1], )
def __init__(self, ntoken: int, emb_sz: int, nhid: HParam, nlayers: int, bidir: bool = False, cell_type="gru", **kwargs): super().__init__() # allow for the same or different parameters between encoder and decoder nhid = get_list(nhid, 2) dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) dropouth = get_kwarg(kwargs, name="dropout_h", default_value=0.3) # RNN output layers dropout dropouth = get_list(dropouth, 2) wdrop = get_kwarg(kwargs, name="wdrop", default_value=0.5) # RNN weights dropout wdrop = get_list(wdrop, 2) train_init = get_kwarg(kwargs, name="train_init", default_value=False) dropoutinit = get_kwarg( kwargs, name="dropout_init", default_value=0.1) # RNN initial states dropout dropoutinit = get_list(dropoutinit, 2) self.cell_type = cell_type self.nt = ntoken self.bidir = bidir encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken, emb_size=emb_sz, dropoute=dropoute[0], dropouti=dropouti[0]) encoder_rnn = RNNLayers(input_size=emb_sz, output_size=kwargs.get("output_size_encoder", emb_sz), nhid=nhid[0], bidir=bidir, dropouth=dropouth[0], wdrop=wdrop[0], nlayers=nlayers, cell_type=self.cell_type, train_init=train_init, dropoutinit=dropoutinit[0]) self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_rnn) self.se_enc = RNNLayers(cell_type=self.cell_type, input_size=encoder_rnn.output_size, output_size=nhid[1], nhid=nhid[1], nlayers=1, dropouth=dropouth[1], wdrop=wdrop[1], train_init=train_init, dropoutinit=dropoutinit[1])
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, session_constraint: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder session_constraint (bool) If true the session will be concated as a constraint to the decoder input **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list( emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3) dropoutd = get_kwarg(kwargs, name="dropout_d", default_value=0.5) # output dropout dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) dropouth = get_kwarg(kwargs, name="dropout_h", default_value=0.3) # RNN output layers dropout dropouth = get_list(dropouth, 3) wdrop = get_kwarg(kwargs, name="wdrop", default_value=0.5) # RNN weights dropout wdrop = get_list(wdrop, 3) train_init = kwargs.pop( "train_init", False) # Have trainable initial states to the RNNs dropoutinit = get_kwarg( kwargs, name="dropout_init", default_value=0.1) # RNN initial states dropout dropoutinit = get_list(dropoutinit, 3) self.cell_type = "gru" self.nt = ntoken[-1] self.pr_force = 1.0 encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[0], dropoute=dropoute[0], dropouti=dropouti[0]) encoder_rnn = RNNLayers(input_size=emb_sz[0], output_size=kwargs.get("output_size_encoder", emb_sz[0]), nhid=nhid[0], bidir=bidir, dropouth=dropouth[0], wdrop=wdrop[0], nlayers=nlayers[0], cell_type=self.cell_type, train_init=train_init, dropoutinit=dropoutinit[0]) self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_rnn) self.se_enc = RNNLayers(cell_type=self.cell_type, input_size=encoder_rnn.output_size, output_size=nhid[1], nhid=nhid[1], nlayers=1, dropouth=dropouth[1], wdrop=wdrop[1], train_init=train_init, dropoutinit=dropoutinit[1]) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[1], dropoute=dropoute[1], dropouti=dropouti[1]) input_size_decoder = kwargs.get("input_size_decoder", emb_sz[1]) input_size_decoder = input_size_decoder + self.se_enc.output_size if session_constraint else input_size_decoder decoder_rnn = RNNLayers(input_size=input_size_decoder, output_size=kwargs.get("output_size_decoder", emb_sz[1]), nhid=nhid[2], bidir=False, dropouth=dropouth[2], wdrop=wdrop[2], nlayers=nlayers[2], cell_type=self.cell_type, train_init=train_init, dropoutinit=dropoutinit[2]) self.session_constraint = session_constraint # allow for changing sizes of decoder output input_size = decoder_rnn.output_size nhid = emb_sz[1] if input_size != emb_sz[1] else None projection_layer = Projection( output_size=ntoken[0], input_size=input_size, nhid=nhid, dropout=dropoutd, tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = Decoder( decoder_layer=decoder_rnn, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, ) self.decoder_state_linear = nn.Linear( in_features=self.se_enc.output_size, out_features=self.decoder.layers[0].output_size)
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list( emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3) dropoutd, kwargs = get_kwarg(kwargs, name="dropoutd", default_value=0.5) self.cell_type = "gru" self.query_encoder = EmbeddingRNNEncoder(ntoken=ntoken[0], emb_sz=emb_sz[0], nhid=nhid[0], nlayers=nlayers[0], pad_token=pad_token, bidir=bidir, out_dim=nhid[0], cell_type=self.cell_type, **kwargs) self.session_encoder = RNNEncoder(in_dim=nhid[0], nhid=nhid[1], out_dim=nhid[2], nlayers=1, bidir=False, cell_type=self.cell_type, **kwargs) self.decoder = EmbeddingRNNDecoder( ntoken=ntoken[-1], emb_sz=emb_sz[-1], nhid=nhid[-1], nlayers=nlayers[-1], pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, # Share the embedding layer between encoder and decoder embedding_layer=self.query_encoder.encoder_with_dropout.embed if share_embedding_layer else None, # potentially tie the output projection with the decoder embedding cell_type=self.cell_type, out_dim=nhid[-1], **kwargs) enc = self.decoder.encoder if tie_decoder else None self.decoder.projection_layer = Projection( n_out=ntoken[-1], n_in=nhid[-1], nhid=emb_sz[-1], dropout=dropoutd, tie_encoder=enc if tie_decoder else None) self.decoder_state_linear = nn.Linear( in_features=nhid[-1], out_features=self.decoder.rnns[0].output_size) self.nt = ntoken[-1]
def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, att_nhid: int, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (int): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder att_nhid (int): Number of hidden dims for the attention Module pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super().__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list( emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3) dropoutd = get_kwarg(kwargs, name="dropoutd", default_value=0.5) # output dropout dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1) # encoder embedding dropout dropoute = get_list(dropoute, 2) dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65) # input dropout dropouti = get_list(dropouti, 2) dropouth = get_kwarg(kwargs, name="dropout_h", default_value=0.3) # RNN output layers dropout dropouth = get_list(dropouth, 3) wdrop = get_kwarg(kwargs, name="wdrop", default_value=0.5) # RNN weights dropout wdrop = get_list(wdrop, 3) self.cell_type = "gru" self.nt = ntoken[-1] self.pr_force = 1.0 self.nlayers = nlayers encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0], emb_size=emb_sz[0], dropoute=dropoute[0], dropouti=dropouti[0]) encoder_rnn = RNNLayers( input_size=emb_sz[0], output_size=kwargs.get("output_size_encoder", emb_sz[0]), nhid=nhid[0], bidir=bidir, dropouth=dropouth[0], wdrop=wdrop[0], nlayers=nlayers[0], cell_type=self.cell_type, ) self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer, encoder_layer=encoder_rnn) self.session_encoder = RNNLayers( input_size=encoder_rnn.output_size, nhid=nhid[1], output_size=kwargs.get("output_size", emb_sz[0]), nlayers=1, bidir=False, cell_type=self.cell_type, wdrop=wdrop[1], dropouth=dropouth[1], ) if share_embedding_layer: decoder_embedding_layer = encoder_embedding_layer else: decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[-1], emb_size=emb_sz[-1], dropoute=dropoute[1], dropouti=dropouti[1]) decoder_rnn = RNNLayers(input_size=kwargs.get("input_size", emb_sz[-1] * 2), output_size=kwargs.get("output_size", emb_sz[-1]), nhid=nhid[-1], bidir=False, dropouth=dropouth[2], wdrop=wdrop[2], nlayers=nlayers[-1], cell_type=self.cell_type) projection_layer = AttentionProjection( output_size=ntoken[-1], input_size=emb_sz[-1], dropout=dropoutd, att_nhid=att_nhid, att_type="SDP", tie_encoder=decoder_embedding_layer if tie_decoder else None) self.decoder = AttentionDecoder( decoder_layer=decoder_rnn, projection_layer=projection_layer, embedding_layer=decoder_embedding_layer, pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, )
def __init__(self, ntoken: HParam, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int, eos_token: int, max_tokens: int = 50, share_embedding_layer: bool = False, tie_decoder: bool = True, bidir: bool = False, **kwargs): """ Args: ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder pad_token (int): The index of the token used for padding eos_token (int): The index of the token used for eos max_tokens (int): The maximum number of steps the decoder iterates before stopping share_embedding_layer (bool): if True the decoder shares its input and output embeddings tie_decoder (bool): if True the encoder and the decoder share their embeddings bidir (bool): if True use a bidirectional encoder **kwargs: Extra embeddings that will be passed to the encoder and the decoder """ super(Seq2Seq, self).__init__() # allow for the same or different parameters between encoder and decoder ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list( emb_sz, 2), get_list(nhid, 2), get_list(nlayers, 2) if "dropoutd" in kwargs: dropoutd = kwargs.pop("dropoutd") else: dropoutd = 0.5 self.encoder = EmbeddingRNNEncoder(ntoken=ntoken[0], emb_sz=emb_sz[0], nhid=nhid[0], nlayers=nlayers[0], pad_token=pad_token, bidir=bidir, **kwargs) self.decoder = EmbeddingRNNDecoder( ntoken=ntoken[-1], emb_sz=emb_sz[-1], nhid=nhid[-1], nlayers=nlayers[-1], pad_token=pad_token, eos_token=eos_token, max_tokens=max_tokens, # Share the embedding layer between encoder and decoder embedding_layer=self.encoder.encoder_with_dropout.embed if share_embedding_layer else None, # potentially tie the output projection with the decoder embedding **kwargs) enc = self.decoder.encoder if tie_decoder else None self.decoder.projection_layer = Projection( n_out=ntoken[-1], n_in=emb_sz[-1], dropout=dropoutd, tie_encoder=enc if tie_decoder else None) self.nt = ntoken[-1]