Example #1
0
    def __init__(self, num_layers, input_size, num_heads, nhid, dropout=0.1):
        super().__init__()
        nhid = get_list(nhid, num_layers)
        num_heads = get_list(num_heads, num_layers)

        self.layers = nn.ModuleList(
            [TransformerLayer(input_size=input_size, nhid=nhid[i], dropout=dropout, num_heads=num_heads[i]) for i in
             range(num_layers)])
Example #2
0
    def __init__(self,
                 ntoken,
                 emb_size=512,
                 nlayers=6,
                 pad_token=None,
                 eos_token=None,
                 max_tokens=200,
                 share_embedding_layer=False,
                 tie_decoder=True,
                 **kwargs):
        super().__init__()

        ntoken = get_list(ntoken, 2)
        self.nlayers = nlayers
        dropout = get_kwarg(kwargs, name="dropout", default_value=0.1)
        num_heads = get_kwarg(kwargs, name="num_heads", default_value=8)
        nhid = get_kwarg(kwargs, name="nhid", default_value=2048)

        encoder_embedding_layer = TransformerEmbeddings(ntokens=ntoken[0],
                                                        emb_size=emb_size,
                                                        dropout=dropout,
                                                        pad_token=pad_token)
        encoder_layer = TransformerEncoderLayers(num_layers=nlayers,
                                                 input_size=emb_size,
                                                 num_heads=num_heads,
                                                 nhid=nhid)
        self.encoder = Encoder(embedding_layer=encoder_embedding_layer,
                               encoder_layer=encoder_layer)

        if share_embedding_layer:
            decoder_embedding_layer = encoder_embedding_layer
        else:
            decoder_embedding_layer = TransformerEmbeddings(
                ntokens=ntoken[-1],
                emb_size=emb_size,
                dropout=dropout,
                pad_token=pad_token)

        decoder_layer = TransformerDecoderLayers(nlayers=nlayers,
                                                 input_size=emb_size,
                                                 num_heads=num_heads,
                                                 nhid=nhid)
        projection_layer = Projection(
            output_size=ntoken[-1],
            input_size=emb_size,
            dropout=dropout,
            tie_encoder=decoder_embedding_layer if tie_decoder else None)
        self.decoder = TransformerDecoder(
            decoder_layer=decoder_layer,
            projection_layer=projection_layer,
            embedding_layer=decoder_embedding_layer,
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
        )
        self.nt = ntoken[-1]
        # xavier uniform initialization
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
Example #3
0
    def __init__(self, ntoken: int, emb_sz: HParam, nhid: HParam, nlayers: HParam, pad_token: int,
                 eos_token: int, num_constraints: int, constraints_sz: int, max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False, **kwargs):
        """

        Args:
            ntoken (int): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            latent_dim (int): The dim of the latent variable
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """

        super().__init__(ntoken=ntoken, emb_sz=emb_sz, nhid=nhid, nlayers=nlayers, pad_token=pad_token,
                         eos_token=eos_token, max_tokens=max_tokens, share_embedding_layer=share_embedding_layer,
                         tie_decoder=tie_decoder, bidir=bidir, input_size_decoder=emb_sz + constraints_sz
                         )

        dropoute = get_kwarg(kwargs, name="dropout_e", default_value=0.1)  # encoder embedding dropout
        dropoute = get_list(dropoute, 2)
        dropouti = get_kwarg(kwargs, name="dropout_i", default_value=0.65)  # input dropout
        dropouti = get_list(dropouti, 2)
        self.constraint_embeddings = DropoutEmbeddings(ntokens=num_constraints,
                                                       emb_size=constraints_sz,
                                                       dropoute=dropoute[-1],
                                                       dropouti=dropouti[-1],
                                                       )
Example #4
0
    def __init__(self,
                 ntoken: int,
                 emb_sz: int,
                 nhid: HParam,
                 nlayers: int,
                 bidir: bool = False,
                 cell_type="gru",
                 **kwargs):
        super().__init__()
        # allow for the same or different parameters between encoder and decoder

        nhid = get_list(nhid, 2)
        dropoute = get_kwarg(kwargs, name="dropout_e",
                             default_value=0.1)  # encoder embedding dropout
        dropoute = get_list(dropoute, 2)
        dropouti = get_kwarg(kwargs, name="dropout_i",
                             default_value=0.65)  # input dropout
        dropouti = get_list(dropouti, 2)
        dropouth = get_kwarg(kwargs, name="dropout_h",
                             default_value=0.3)  # RNN output layers dropout
        dropouth = get_list(dropouth, 2)
        wdrop = get_kwarg(kwargs, name="wdrop",
                          default_value=0.5)  # RNN weights dropout
        wdrop = get_list(wdrop, 2)
        train_init = get_kwarg(kwargs, name="train_init", default_value=False)
        dropoutinit = get_kwarg(
            kwargs, name="dropout_init",
            default_value=0.1)  # RNN initial states dropout
        dropoutinit = get_list(dropoutinit, 2)

        self.cell_type = cell_type
        self.nt = ntoken
        self.bidir = bidir

        encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken,
                                                    emb_size=emb_sz,
                                                    dropoute=dropoute[0],
                                                    dropouti=dropouti[0])

        encoder_rnn = RNNLayers(input_size=emb_sz,
                                output_size=kwargs.get("output_size_encoder",
                                                       emb_sz),
                                nhid=nhid[0],
                                bidir=bidir,
                                dropouth=dropouth[0],
                                wdrop=wdrop[0],
                                nlayers=nlayers,
                                cell_type=self.cell_type,
                                train_init=train_init,
                                dropoutinit=dropoutinit[0])
        self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer,
                                     encoder_layer=encoder_rnn)
        self.se_enc = RNNLayers(cell_type=self.cell_type,
                                input_size=encoder_rnn.output_size,
                                output_size=nhid[1],
                                nhid=nhid[1],
                                nlayers=1,
                                dropouth=dropouth[1],
                                wdrop=wdrop[1],
                                train_init=train_init,
                                dropoutinit=dropoutinit[1])
Example #5
0
    def __init__(self,
                 ntoken: int,
                 emb_sz: HParam,
                 nhid: HParam,
                 nlayers: HParam,
                 pad_token: int,
                 eos_token: int,
                 max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False,
                 session_constraint: bool = False,
                 **kwargs):
        """

        Args:
            ntoken (int): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            session_constraint (bool) If true the session will be concated as a constraint to the decoder input
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super().__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list(
            emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3)
        dropoutd = get_kwarg(kwargs, name="dropout_d",
                             default_value=0.5)  # output dropout
        dropoute = get_kwarg(kwargs, name="dropout_e",
                             default_value=0.1)  # encoder embedding dropout
        dropoute = get_list(dropoute, 2)
        dropouti = get_kwarg(kwargs, name="dropout_i",
                             default_value=0.65)  # input dropout
        dropouti = get_list(dropouti, 2)
        dropouth = get_kwarg(kwargs, name="dropout_h",
                             default_value=0.3)  # RNN output layers dropout
        dropouth = get_list(dropouth, 3)
        wdrop = get_kwarg(kwargs, name="wdrop",
                          default_value=0.5)  # RNN weights dropout
        wdrop = get_list(wdrop, 3)

        train_init = kwargs.pop(
            "train_init", False)  # Have trainable initial states to the RNNs
        dropoutinit = get_kwarg(
            kwargs, name="dropout_init",
            default_value=0.1)  # RNN initial states dropout
        dropoutinit = get_list(dropoutinit, 3)
        self.cell_type = "gru"
        self.nt = ntoken[-1]
        self.pr_force = 1.0

        encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0],
                                                    emb_size=emb_sz[0],
                                                    dropoute=dropoute[0],
                                                    dropouti=dropouti[0])

        encoder_rnn = RNNLayers(input_size=emb_sz[0],
                                output_size=kwargs.get("output_size_encoder",
                                                       emb_sz[0]),
                                nhid=nhid[0],
                                bidir=bidir,
                                dropouth=dropouth[0],
                                wdrop=wdrop[0],
                                nlayers=nlayers[0],
                                cell_type=self.cell_type,
                                train_init=train_init,
                                dropoutinit=dropoutinit[0])
        self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer,
                                     encoder_layer=encoder_rnn)
        self.se_enc = RNNLayers(cell_type=self.cell_type,
                                input_size=encoder_rnn.output_size,
                                output_size=nhid[1],
                                nhid=nhid[1],
                                nlayers=1,
                                dropouth=dropouth[1],
                                wdrop=wdrop[1],
                                train_init=train_init,
                                dropoutinit=dropoutinit[1])
        if share_embedding_layer:
            decoder_embedding_layer = encoder_embedding_layer
        else:
            decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0],
                                                        emb_size=emb_sz[1],
                                                        dropoute=dropoute[1],
                                                        dropouti=dropouti[1])

        input_size_decoder = kwargs.get("input_size_decoder", emb_sz[1])
        input_size_decoder = input_size_decoder + self.se_enc.output_size if session_constraint else input_size_decoder
        decoder_rnn = RNNLayers(input_size=input_size_decoder,
                                output_size=kwargs.get("output_size_decoder",
                                                       emb_sz[1]),
                                nhid=nhid[2],
                                bidir=False,
                                dropouth=dropouth[2],
                                wdrop=wdrop[2],
                                nlayers=nlayers[2],
                                cell_type=self.cell_type,
                                train_init=train_init,
                                dropoutinit=dropoutinit[2])
        self.session_constraint = session_constraint
        # allow for changing sizes of decoder output
        input_size = decoder_rnn.output_size
        nhid = emb_sz[1] if input_size != emb_sz[1] else None
        projection_layer = Projection(
            output_size=ntoken[0],
            input_size=input_size,
            nhid=nhid,
            dropout=dropoutd,
            tie_encoder=decoder_embedding_layer if tie_decoder else None)
        self.decoder = Decoder(
            decoder_layer=decoder_rnn,
            projection_layer=projection_layer,
            embedding_layer=decoder_embedding_layer,
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
        )
        self.decoder_state_linear = nn.Linear(
            in_features=self.se_enc.output_size,
            out_features=self.decoder.layers[0].output_size)
Example #6
0
    def __init__(self,
                 ntoken: int,
                 emb_sz: HParam,
                 nhid: HParam,
                 nlayers: HParam,
                 pad_token: int,
                 eos_token: int,
                 max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False,
                 **kwargs):
        """

        Args:
            ntoken (int): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super().__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list(
            emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3)
        dropoutd, kwargs = get_kwarg(kwargs,
                                     name="dropoutd",
                                     default_value=0.5)
        self.cell_type = "gru"
        self.query_encoder = EmbeddingRNNEncoder(ntoken=ntoken[0],
                                                 emb_sz=emb_sz[0],
                                                 nhid=nhid[0],
                                                 nlayers=nlayers[0],
                                                 pad_token=pad_token,
                                                 bidir=bidir,
                                                 out_dim=nhid[0],
                                                 cell_type=self.cell_type,
                                                 **kwargs)

        self.session_encoder = RNNEncoder(in_dim=nhid[0],
                                          nhid=nhid[1],
                                          out_dim=nhid[2],
                                          nlayers=1,
                                          bidir=False,
                                          cell_type=self.cell_type,
                                          **kwargs)

        self.decoder = EmbeddingRNNDecoder(
            ntoken=ntoken[-1],
            emb_sz=emb_sz[-1],
            nhid=nhid[-1],
            nlayers=nlayers[-1],
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
            # Share the embedding layer between encoder and decoder
            embedding_layer=self.query_encoder.encoder_with_dropout.embed
            if share_embedding_layer else None,
            # potentially tie the output projection with the decoder embedding
            cell_type=self.cell_type,
            out_dim=nhid[-1],
            **kwargs)
        enc = self.decoder.encoder if tie_decoder else None
        self.decoder.projection_layer = Projection(
            n_out=ntoken[-1],
            n_in=nhid[-1],
            nhid=emb_sz[-1],
            dropout=dropoutd,
            tie_encoder=enc if tie_decoder else None)
        self.decoder_state_linear = nn.Linear(
            in_features=nhid[-1],
            out_features=self.decoder.rnns[0].output_size)
        self.nt = ntoken[-1]
Example #7
0
    def __init__(self,
                 ntoken: int,
                 emb_sz: HParam,
                 nhid: HParam,
                 nlayers: HParam,
                 att_nhid: int,
                 pad_token: int,
                 eos_token: int,
                 max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False,
                 **kwargs):
        """

        Args:
            ntoken (int): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder (first two values) and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            att_nhid (int): Number of hidden dims for the attention Module
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super().__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken), get_list(
            emb_sz, 2), get_list(nhid, 3), get_list(nlayers, 3)
        dropoutd = get_kwarg(kwargs, name="dropoutd",
                             default_value=0.5)  # output dropout
        dropoute = get_kwarg(kwargs, name="dropout_e",
                             default_value=0.1)  # encoder embedding dropout
        dropoute = get_list(dropoute, 2)
        dropouti = get_kwarg(kwargs, name="dropout_i",
                             default_value=0.65)  # input dropout
        dropouti = get_list(dropouti, 2)
        dropouth = get_kwarg(kwargs, name="dropout_h",
                             default_value=0.3)  # RNN output layers dropout
        dropouth = get_list(dropouth, 3)
        wdrop = get_kwarg(kwargs, name="wdrop",
                          default_value=0.5)  # RNN weights dropout
        wdrop = get_list(wdrop, 3)
        self.cell_type = "gru"
        self.nt = ntoken[-1]
        self.pr_force = 1.0
        self.nlayers = nlayers

        encoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[0],
                                                    emb_size=emb_sz[0],
                                                    dropoute=dropoute[0],
                                                    dropouti=dropouti[0])

        encoder_rnn = RNNLayers(
            input_size=emb_sz[0],
            output_size=kwargs.get("output_size_encoder", emb_sz[0]),
            nhid=nhid[0],
            bidir=bidir,
            dropouth=dropouth[0],
            wdrop=wdrop[0],
            nlayers=nlayers[0],
            cell_type=self.cell_type,
        )
        self.query_encoder = Encoder(embedding_layer=encoder_embedding_layer,
                                     encoder_layer=encoder_rnn)
        self.session_encoder = RNNLayers(
            input_size=encoder_rnn.output_size,
            nhid=nhid[1],
            output_size=kwargs.get("output_size", emb_sz[0]),
            nlayers=1,
            bidir=False,
            cell_type=self.cell_type,
            wdrop=wdrop[1],
            dropouth=dropouth[1],
        )

        if share_embedding_layer:
            decoder_embedding_layer = encoder_embedding_layer
        else:
            decoder_embedding_layer = DropoutEmbeddings(ntokens=ntoken[-1],
                                                        emb_size=emb_sz[-1],
                                                        dropoute=dropoute[1],
                                                        dropouti=dropouti[1])

        decoder_rnn = RNNLayers(input_size=kwargs.get("input_size",
                                                      emb_sz[-1] * 2),
                                output_size=kwargs.get("output_size",
                                                       emb_sz[-1]),
                                nhid=nhid[-1],
                                bidir=False,
                                dropouth=dropouth[2],
                                wdrop=wdrop[2],
                                nlayers=nlayers[-1],
                                cell_type=self.cell_type)

        projection_layer = AttentionProjection(
            output_size=ntoken[-1],
            input_size=emb_sz[-1],
            dropout=dropoutd,
            att_nhid=att_nhid,
            att_type="SDP",
            tie_encoder=decoder_embedding_layer if tie_decoder else None)
        self.decoder = AttentionDecoder(
            decoder_layer=decoder_rnn,
            projection_layer=projection_layer,
            embedding_layer=decoder_embedding_layer,
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
        )
Example #8
0
    def __init__(self,
                 ntoken: HParam,
                 emb_sz: HParam,
                 nhid: HParam,
                 nlayers: HParam,
                 pad_token: int,
                 eos_token: int,
                 max_tokens: int = 50,
                 share_embedding_layer: bool = False,
                 tie_decoder: bool = True,
                 bidir: bool = False,
                 **kwargs):
        """

        Args:
            ntoken (Union[List[int],int]): Number of tokens for the encoder and the decoder
            emb_sz (Union[List[int],int]): Embedding size for the encoder and decoder embeddings
            nhid (Union[List[int],int]): Number of hidden dims for the encoder and the decoder
            nlayers (Union[List[int],int]): Number of layers for the encoder and the decoder
            pad_token (int): The  index of the token used for padding
            eos_token (int): The index of the token used for eos
            max_tokens (int): The maximum number of steps the decoder iterates before stopping
            share_embedding_layer (bool): if True the decoder shares its input and output embeddings
            tie_decoder (bool): if True the encoder and the decoder share their embeddings
            bidir (bool): if True use a bidirectional encoder
            **kwargs: Extra embeddings that will be passed to the encoder and the decoder
        """
        super(Seq2Seq, self).__init__()
        # allow for the same or different parameters between encoder and decoder
        ntoken, emb_sz, nhid, nlayers = get_list(ntoken, 2), get_list(
            emb_sz, 2), get_list(nhid, 2), get_list(nlayers, 2)
        if "dropoutd" in kwargs:
            dropoutd = kwargs.pop("dropoutd")
        else:
            dropoutd = 0.5
        self.encoder = EmbeddingRNNEncoder(ntoken=ntoken[0],
                                           emb_sz=emb_sz[0],
                                           nhid=nhid[0],
                                           nlayers=nlayers[0],
                                           pad_token=pad_token,
                                           bidir=bidir,
                                           **kwargs)

        self.decoder = EmbeddingRNNDecoder(
            ntoken=ntoken[-1],
            emb_sz=emb_sz[-1],
            nhid=nhid[-1],
            nlayers=nlayers[-1],
            pad_token=pad_token,
            eos_token=eos_token,
            max_tokens=max_tokens,
            # Share the embedding layer between encoder and decoder
            embedding_layer=self.encoder.encoder_with_dropout.embed
            if share_embedding_layer else None,
            # potentially tie the output projection with the decoder embedding
            **kwargs)
        enc = self.decoder.encoder if tie_decoder else None
        self.decoder.projection_layer = Projection(
            n_out=ntoken[-1],
            n_in=emb_sz[-1],
            dropout=dropoutd,
            tie_encoder=enc if tie_decoder else None)
        self.nt = ntoken[-1]