Example #1
0
 def __init__(self,
              size,
              self_attn,
              src_attn,
              feed_forward,
              dropout_rate,
              death_rate=0.0,
              normalize_before=True,
              concat_after=False):
     """Construct an DecoderLayer object."""
     super(StochasticDecoderLayer, self).__init__()
     self.size = size
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.norm1 = LayerNorm(size)
     self.norm2 = LayerNorm(size)
     self.norm3 = LayerNorm(size)
     self.dropout = nn.Dropout(dropout_rate)
     self.death_rate = death_rate
     self.normalize_before = normalize_before
     self.concat_after = concat_after
     if self.concat_after:
         self.concat_linear1 = nn.Linear(size + size, size)
         self.concat_linear2 = nn.Linear(size + size, size)
Example #2
0
 def __init__(self, d_model, d_ff, dropout_rate):
     super(TransEncoder, self).__init__()
     self.multi_head_attn = MultiHeadAttn(d_model, num_heads=8)
     self.dropout1 = nn.Dropout(dropout_rate)
     self.norm1 = LayerNorm(d_model)
     self.feed_forward = FeedForward(d_model, d_ff)
     self.dropout2 = nn.Dropout(dropout_rate)
     self.norm2 = LayerNorm(d_model)
Example #3
0
    def __init__(self,
                 input_module,
                 filter_widths,
                 filter_outputs,
                 dropout=0.0,
                 layer_norm=False,
                 activation="relu"):
        super(CNNEncoder, self).__init__()

        self.input_module_ = input_module
        #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
        self.filters_ = nn.ModuleList([
            nn.Conv2d(1, filter_outputs, (fw, input_module.embedding_size))
            for fw in filter_widths
        ])

        if activation == "relu":
            self.activation_ = F.relu
            self.reverse_activation_and_pool_ = True
        else:
            raise Exception(
                "activation {} not implemented for no good reason.".format(
                    activation))

        self.dropout_ = dropout
        if layer_norm:
            self.layer_norms_ = nn.ModuleList(
                [LayerNorm(filter_outputs) for _ in filter_widths])
        else:
            self.layer_norms_ = []
Example #4
0
 def __init__(self, layer, N):
     """
     :param layer:
     :param N:
     """
     super(Decoder, self).__init__()
     self.layers = clones(layer, N)
     self.norm = LayerNorm(layer.size)
 def __init__(self, size, dropout):
     """
     :param size:
     :param dropout:
     """
     super(SublayerConnection, self).__init__()
     self.norm = LayerNorm(size)
     self.dropout = nn.Dropout(dropout)
Example #6
0
    def __init__(self, encoder: nn.Module, generator, embedding, n_layers: int):
        """

        :param encoder: encoder/transformer layer that takes advantage of self-attention
        :param n_layers: int, number of encoder/transformer layers
        """
        super(Bert, self).__init__()
        self.encoder = encoder
        self.layers = clone(encoder, n_layers)
        self.embed = embedding
        self.layer_norm = LayerNorm(encoder.size)
        self.generator = generator
Example #7
0
 def __init__(self, layer, N):
     super(Decoder, self).__init__()
     self.layers = clones(layer, N)
     self.norm = LayerNorm(layer.size)
Example #8
0
 def __init__(self, layer, N):
     super(Encoder, self).__init__()
     self.layers = clones(layer, N)
     #TODO: Use BatchNorm instead?
     self.norm = LayerNorm(layer.size)
Example #9
0
    def __init__(self,
                 inp_dim,
                 out_dim,
                 bidirectional=True,
                 dropout_p=[0.2],
                 use_batchnorm=[True],
                 use_layernorm=[False],
                 use_inp_layernorm=False,
                 use_inp_batchnorm=True,
                 orth_init=[True],
                 ligru_act=nn.ReLU,
                 to_do='train',
                 use_cuda=True):
        super(liGRU, self).__init__()

        # Reading parameters
        self.input_dim = inp_dim
        self.out_dim = out_dim
        self.dropout_p = dropout_p
        self.use_batchnorm = use_batchnorm
        self.use_layernorm = use_layernorm
        self.use_inp_layernorm = use_inp_layernorm
        self.use_inp_batchnorm = use_inp_batchnorm
        self.orth_init = orth_init
        self.ligru_act = ligru_act
        self.bidirectional = bidirectional
        self.use_cuda = use_cuda
        self.to_do = to_do

        if self.to_do == 'train':
            self.test_flag = False
        else:
            self.test_flag = True

        # List initialization
        self.wh = nn.ModuleList([])
        self.uh = nn.ModuleList([])

        self.wz = nn.ModuleList([])  # Update Gate
        self.uz = nn.ModuleList([])  # Update Gate

        self.ln = nn.ModuleList([])  # Layer Norm
        self.bn_wh = nn.ModuleList([])  # Batch Norm
        self.bn_wz = nn.ModuleList([])  # Batch Norm

        self.act = nn.ModuleList([])  # Activations

        # Input layer normalization
        if self.use_inp_layernorm:
            self.ln0 = LayerNorm(self.input_dim)

        # Input batch normalization
        if self.use_inp_batchnorm:
            self.bn0 = nn.BatchNorm1d(self.input_dim, momentum=0.05)

        self.N_ligru_lay = len(self.out_dim)

        current_input = self.input_dim

        # Initialization of hidden layers

        for i in range(self.N_ligru_lay):

            # Activations
            self.act += [self.ligru_act()]

            add_bias = True

            if self.use_layernorm[i] or self.use_batchnorm[i]:
                add_bias = False

            # Feed-forward connections
            self.wh.append(
                nn.Linear(current_input, self.out_dim[i], bias=add_bias))
            self.wz.append(
                nn.Linear(current_input, self.out_dim[i], bias=add_bias))

            # Recurrent connections
            self.uh.append(
                nn.Linear(self.out_dim[i], self.out_dim[i], bias=False))
            self.uz.append(
                nn.Linear(self.out_dim[i], self.out_dim[i], bias=False))

            if self.orth_init:
                nn.init.orthogonal_(self.uh[i].weight)
                nn.init.orthogonal_(self.uz[i].weight)

            # batch norm initialization
            self.bn_wh.append(nn.BatchNorm1d(self.out_dim[i], momentum=0.05))
            self.bn_wz.append(nn.BatchNorm1d(self.out_dim[i], momentum=0.05))

            self.ln.append(LayerNorm(self.out_dim[i]))

            if self.bidirectional:
                current_input = 2 * self.out_dim[i]
            else:
                current_input = self.out_dim[i]

        self.final_dim = self.out_dim[i] + self.bidirectional * self.out_dim[i]
 def __init__(self, size, dropout):
     super(SublayerConnection, self).__init__()
     #TODO: Maybe use BatchNorm?
     self.norm = LayerNorm(size)
     self.dropout = nn.Dropout(dropout)