Python relu Exemples, mxnet.ndarray.relu Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : DropoutScratch.py Projet : JYLFamily/Python_Study_Note

    def function_set(self):
        def dropout(batch_X, drop_probability):
            keep_probability = 1 - drop_probability
            assert 0 <= keep_probability <= 1
            if keep_probability == 0:
                return batch_X.zeros_like()

            # > 保存的概率才能够保留该样本该神经元的输出
            mask = nd.random_uniform(
                0, 1.0, batch_X.shape, ctx=batch_X.context) < keep_probability
            # 保证 E[dropout(batch_X)] == batch_X
            scale = 1 / keep_probability

            return mask * batch_X * scale

        # Dense 需要 dropout Conv 其实不需要因为已经 share weight 了
        h1 = dropout(
            nd.relu(
                nd.dot(self.__batch_X.reshape(
                    (-1, self.__num_inputs)), self.__W1) + self.__b1),
            self.__drop_prob1)
        h2 = dropout(nd.relu(nd.dot(h1, self.__W2) + self.__b2),
                     self.__drop_prob2)

        return nd.dot(h2, self.__W3) + self.__b3

Exemple #2

0

Afficher le fichier

Fichier : resnet.py Projet : xcszbdnl/Toy

 def forward(self, x):
     out = nd.relu(self.bn1(self.conv1(x)))
     # print("in forward", out.shape)
     out = self.bn2(self.conv2(out))
     if not self.same_shape:
         x = self.conv3(x)
     return nd.relu(out + x)

Exemple #3

0

Afficher le fichier

Fichier : 7.cnn-scratch.py Projet : 1205108909/Hand-On-Deep-Learning

def net(X, verbose=False):
    X = X.as_in_context(W1.context)
    # 第一层卷积
    h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0])
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2))

    # 第二层卷积
    h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
    h2 = nd.flatten(h2)

    # 第一层全连接
    h3_linear = nd.dot(h2, W3) + b3
    h3 = nd.relu(h3_linear)

    # 第二层全连接
    h4_linear = nd.dot(h3, W4) + b4
    if verbose:
        print('1st conv block', h1.shape)
        print('2nd conv block', h2.shape)
        print('1st conv block', h3.shape)
        print('2nd conv block', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear

Exemple #4

0

Afficher le fichier

 def forward(self, x):
     x = nd.relu(self.bn1(self.conv1(x)))
     x = nd.relu(self.bn2(self.conv2(x)))
     x = nd.relu(self.bn3(self.conv3(x)))
     x = nd.relu(self.fc1(x))
     x = nd.relu(self.fc2(x))
     return self.out(x)

Exemple #5

0

Afficher le fichier

    def function_set(self):
        # 第一层卷积
        # 卷积
        h1_conv = nd.Convolution(
            data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=self.__W1.shape[2:], num_filter=self.__W1.shape[0])
        # 激活
        h1_activation = nd.relu(h1_conv)
        # 池化
        h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        # 第二层卷积
        h2_conv = nd.Convolution(
            data=h1, weight=self.__W2, bias=self.__b2, kernel=self.__W2.shape[2:], num_filter=self.__W2.shape[0])
        h2_activation = nd.relu(h2_conv)
        h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        h2 = nd.flatten(h2)
        # 第一层全连接
        h3_linear = nd.dot(h2, self.__W3) + self.__b3
        h3 = nd.relu(h3_linear)
        # 第二层全连接
        h4_linear = nd.dot(h3, self.__W4) + self.__b4

        # print("1st conv block:", h1.shape)
        # print("2nd conv block:", h2.shape)
        # print("1st dense:", h3.shape)
        # print("2nd dense:", h4_linear.shape)
        # print("output:", h4_linear)

        return h4_linear

Exemple #6

0

Afficher le fichier

Fichier : models.py Projet : gaurav-gireesh/myownwavenet

 def postprocess(self, x):
     output = F.relu(x)
     output = self.conv_post_1(output)
     output = F.relu(output)
     output = self.conv_post_2(output)
     output = nd.reshape(output, (output.shape[1], output.shape[2]))
     output = F.transpose(output, axes=(1, 0))
     return output

Exemple #7

0

Afficher le fichier

    def forward(self, pred, label):
        label = nd.one_hot(label, self.nclass)
        alpha_p = nd.relu(self.op - pred)
        alpha_n = nd.relu(pred - self.on)

        pred = (label * (alpha_p * (pred - self.delta_p)) + (1-label) * (alpha_n * (pred - self.delta_n))) * self.scale

        return self.loss(pred, label)

Exemple #8

0

Afficher le fichier

Fichier : BatchNormalizationScratch.py Projet : JYLFamily/Python_Study_Note

    def function_set(self):
        def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps=1e-5, moving_momentum=0.9):
            assert len(X.shape) in (2, 4)
            # 全连接: batch_size x feature
            if len(X.shape) == 2:
                # 每个输入维度在样本上的平均和方差
                mean = X.mean(axis=0)
                variance = ((X - mean) ** 2).mean(axis=0)
            # 2D卷积: batch_size x channel x height x width
            else:
                # 对每个通道算均值和方差，需要保持 4D 形状使得可以正确的广播
                mean = X.mean(axis=(0, 2, 3), keepdims=True)
                variance = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
                # 变形使得可以正确的广播
                moving_mean = moving_mean.reshape(mean.shape)
                moving_variance = moving_variance.reshape(mean.shape)
            # 均一化
            if is_training:
                X_hat = (X - mean) / nd.sqrt(variance + eps)
                # !!! 更新全局的均值和方差
                # 每一个 batch_X 都会使用上个 batch_X 的 0.9 与 这个 batch_X 的 0.1
                moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean
                moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance
            else:
                # !!! 测试阶段使用全局的均值和方差
                X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps)

            # 拉升和偏移
            return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape)

        # 第一层卷积
        h1_conv = nd.Convolution(
            data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=(5, 5), num_filter=20)
        # 第一个 BN
        h1_bn = batch_norm(
            h1_conv, self.__gamma1, self.__beta1, self.__is_training, self.__moving_mean1, self.__moving_variance1)
        h1_activation = nd.relu(h1_bn)
        h1 = nd.Pooling(
            data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))

        # 第二层卷积
        h2_conv = nd.Convolution(
            data=h1, weight=self.__W2, bias=self.__b2, kernel=(3, 3), num_filter=50)
        # 第二个 BN
        h2_bn = batch_norm(
            h2_conv, self.__gamma2, self.__beta2, self.__is_training, self.__moving_mean2, self.__moving_variance2)
        h2_activation = nd.relu(h2_bn)
        h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        h2 = nd.flatten(h2)

        # 第一层全连接
        h3_linear = nd.dot(h2, self.__W3) + self.__b3
        h3 = nd.relu(h3_linear)

        # 第二层全连接
        h4_linear = nd.dot(h3, self.__W4) + self.__b4

        return h4_linear

Exemple #9

0

Afficher le fichier

Fichier : resnet.py Projet : cuhkcarl/grebeci

 def forward(self, x):
     out = self.conv_1(nd.relu(self.bn_1(x)))
     out = nd.relu(self.bn_2(out))
     if self.is_dropout:
         out = self.dropout(out)
     out = self.conv_2(out)
     if not self.same_shape:
         x = self.conv_3(x)
     return out + x

Exemple #10

0

Afficher le fichier

Fichier : dropout.py Projet : wk738126046/ML

def net(x, is_training=False):
    # w1, b1, w2, b2, w3, b3 = params = initParam(verbose=True)
    x = x.reshape(shape=(-1, num_input))  # (256,784)
    # print(x.shape)
    x1 = nd.relu(nd.dot(x, w1) + b1)
    if is_training: x1 = dropout(x1, 0.8)
    x2 = nd.relu(nd.dot(x1, w2) + b2)
    if is_training: x2 = dropout(x2, 0.5)
    out = nd.dot(x2, w3) + b3
    return out

Exemple #11

0

Afficher le fichier

Fichier : 1.8-dropout.py Projet : wshuail/Deep_Learning_with_MXNet

def net(X):
    X = X.reshape((-1, num_inputs))
    h1 = nd.dot(X, w1) + b1
    h1 = nd.relu(h1)
    h1 = dropout(h1, dropout_prob_1)
    h2 = nd.dot(h1, w2) + b2
    h2 = nd.relu(h2)
    h2 = dropout(h2, dropout_prob_2)
    y = nd.dot(h2, w3) + b3
    return y

Exemple #12

0

Afficher le fichier

Fichier : share.py Projet : zachgk/mxnet-warehouse

 def forward(self, x):
     x = self.pool1(F.relu(self.conv1(x)))
     x = self.pool2(F.relu(self.conv2(x)))
     # 0 means copy over size from corresponding dimension.
     # -1 means infer size from the rest of dimensions.
     x = x.reshape((0, -1))
     x = F.relu(self.fc1(x))
     x = F.relu(self.fc2(x))
     x = self.fc3(x)
     return x

Exemple #13

0

Afficher le fichier

 def forward(self, x):
     x = F.relu(self.conv1(x))
     x = F.relu(self.conv2(x))
     x = x.reshape((-1, 192))
     x = self.dense(x)
     x = self.dense2(x)
     x = self.dense3(x)
     x = self.dense4(x)
     probs = self.action_pred(x)
     values = self.value_pred(x)
     return mx.ndarray.softmax(probs), values

Exemple #14

0

Afficher le fichier

Fichier : dropout-scratch.py Projet : zldevil2011/deep_learning

def net(X):
    X = X.reshape((-1, num_inputs))
    # 第一层全连接。
    h1 = nd.relu(nd.dot(X, W1) + b1)
    # 在第一层全连接后添加丢弃层。
    h1 = dropout(h1, drop_prob1)
    # 第二层全连接。
    h2 = nd.relu(nd.dot(h1, W2) + b2)
    # 在第二层全连接后添加丢弃层。
    h2 = dropout(h2, drop_prob2)
    return nd.dot(h2, W3) + b3

Exemple #15

0

Afficher le fichier

 def forward(self, x, z):
     bs = x.shape[0]
     x = F.relu(self.conv1(x))
     x = self.norm1(self.pool1(x))
     x = F.relu(self.conv2(x))
     x = self.norm2(self.pool2(x))
     x = F.relu(self.dense1(x))
     x = F.concat(x, z)
     status = self.encoder.begin_state(batch_size=bs)
     x, status = self.encoder(x, status)
     return x, status

Exemple #16

0

Afficher le fichier

Fichier : models.py Projet : whl33886/WaveNet-gluon

 def postprocess(self, x):
     """
     Description : module for postprocess
     """
     output = F.relu(x)
     output = self.conv_post_1(output)
     output = F.relu(output)
     output = self.conv_post_2(output)
     output = nd.reshape(output, (output.shape[1], output.shape[2]))
     output = F.transpose(output, axes=(1, 0))
     return output

Exemple #17

0

Afficher le fichier

Fichier : Gluon_mnist_drop.py Projet : nagadakos/ml-repo

 def forward(self, x):
     x = F.relu(self.conv1(x))
     x = self.pool2(F.relu(self.conv2(x)))
     x = self.drop2D(x)
     # 0 means copy over size from corresponding dimension.
     # -1 means infer size from the rest of dimensions.
     # Essentially flattens to 1D.
     x = x.reshape((0, -1))
     x = F.relu(self.fc1(x))
     x = self.drop1D(x)
     x = F.relu(self.fc2(x))
     x = F.softmax(x)
     return x

Exemple #18

0

Afficher le fichier

Fichier : AWS-test.py Projet : MHaneferd/mxnet.gluon

 def forward(self, x):
     x = F.relu(self.conv1(x))
     x = F.relu(self.conv2(x))
     x = x.reshape((-1, 192))
     x = x.reshape((1, 1, 192))
     #        x = self.lstm(x)
     #        x = self.dense(x)
     #        x = self.dense2(x)
     #        x = self.dense3(x)
     #        x = self.dense4(x)
     probs = self.action_pred(x)
     values = self.value_pred(x)
     probs = probs.reshape((-1, self.available_actions_count))
     values = values.reshape((-1, 1))
     return mx.ndarray.softmax(probs, axis=1), values

Exemple #19

0

Afficher le fichier

Fichier : hack_mabdqn.py Projet : WowCZ/strac

    def bayes_forward(self,
                      x,
                      dense,
                      loss,
                      activation_fn=None,
                      is_target=False):
        weight = self.get_sample(mu=dense.weight_mu.data(),
                                 rho=dense.weight_rho.data(),
                                 is_target=is_target)
        bias = self.get_sample(mu=dense.bias_mu.data(),
                               rho=dense.bias_rho.data(),
                               is_target=is_target)

        loss = loss + log_gaussian(x=weight,
                                   mu=dense.weight_mu.data(),
                                   sigma=softplus(dense.weight_rho.data()))
        loss = loss + log_gaussian(x=bias,
                                   mu=dense.bias_mu.data(),
                                   sigma=softplus(dense.bias_rho.data()))
        loss = loss - log_gaussian(x=weight, mu=0., sigma=self.sigma_prior)
        loss = loss - log_gaussian(x=bias, mu=0., sigma=self.sigma_prior)

        result = nd.dot(x, weight) + bias
        if activation_fn is None:
            return result
        elif activation_fn == 'relu':
            return nd.relu(result)

Exemple #20

0

Afficher le fichier

Fichier : models.py Projet : kyocen/relational-network-gluon

    def forward(self, x):
        x = self.fc2(x)
        x = F.relu(x)
        x = F.Dropout(x)
        x = self.fc3(x)

        return x

Exemple #21

0

Afficher le fichier

Fichier : hack_bdqn.py Projet : WowCZ/strac

    def forward(self, inputs, is_target=False):
        result = None
        loss = 0.
        for _ in range(self.n_samples):
            tmp = inputs

            weights = []
            biases = []
            for i in range(len(self.weight_mus)):
                weights.append(self.get_sample(
                    mu=self.weight_mus[i].data(), rho=self.weight_rhos[i].data(), is_target=is_target))
                biases.append(self.get_sample(mu=self.bias_mus[i].data(), rho=self.bias_rhos[i].data(), is_target=is_target))
                loss = loss + log_gaussian(
                    x=weights[-1], mu=self.weight_mus[i].data(), sigma=softplus(self.weight_rhos[i].data()))
                loss = loss + log_gaussian(x=biases[-1], mu=self.bias_mus[i].data(), sigma=softplus(self.bias_rhos[i].data()))
                loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior)
                loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior)
            for i in range(len(weights)):
                tmp = nd.dot(tmp, weights[i]) + biases[i]
                if i != len(weights) - 1:
                    tmp = nd.relu(tmp)
            if result is None:
                result = nd.zeros_like(tmp)
            result = result + tmp
        result = result / float(self.n_samples)
        loss = loss / float(self.n_samples)
        return result, loss

Exemple #22

0

Afficher le fichier

Fichier : cgan_mnist.py Projet : veetsin/DCGAN-and-CGAN_gluon

    def forward(self, x, y):
        x = nd.relu(self.bn_z(self.dense_z(x)))
        
        y = nd.expand_dims(y, axis=2)
        y = nd.expand_dims(y, axis=2)
        y = nd.relu(self.bn_label(self.dense_label(y)))
        
        z = nd.concat(x, y, dim=1)
        
        z = z.reshape([z.shape[0],z.shape[1],1,1])
        x = nd.relu(self.bn2(self.deconv2(z)))
        x = nd.relu(self.bn3(self.deconv3(x)))
        x = nd.relu(self.bn4(self.deconv4(x)))

#        x = nd.sigmoid(self.out(z))
        
        return x

Exemple #23

0

Afficher le fichier

def net(X, verbose=False):
    X = X.as_in_context(W1.context)
    # 第一个卷积层
    h1_conv = nd.Convolution(data=X,
                             weight=W1,
                             bias=b1,
                             kernel=W1.shape[2:],
                             num_filter=W1.shape[0])
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_activation,
                    pool_type="max",
                    kernel=(2, 2),
                    stride=(2, 2))

    # h1_conv.shape:  (256, 20, 24, 24)
    # h1.shape:  (256, 20, 12, 12)
    #print('h1_conv.shape: ',h1_conv.shape)
    #print('h1.shape: ',h1.shape)

    # 第二个卷积层
    h2_conv = nd.Convolution(data=h1,
                             weight=W2,
                             bias=b2,
                             kernel=W2.shape[2:],
                             num_filter=W2.shape[0])
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(data=h2_activation,
                    pool_type="max",
                    kernel=(2, 2),
                    stride=(2, 2))
    h2 = nd.flatten(h2)

    # 第一个全连接层
    h3_linear = nd.dot(h2, W3) + b3
    h3 = nd.relu(h3_linear)

    # 第二个全连接层
    h4_linear = nd.dot(h3, W4) + b4
    if verbose:
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense block:', h3.shape)
        print('2nd dense block:', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear

Exemple #24

0

Afficher le fichier

Fichier : batch-normal-scratch.base.py Projet : RunningV/mxnet_learning

def net(x, is_training=False, verbose=False):
    x = x.as_in_context(w1.context)
    h1_conv = nd.Convolution(data=x,
                             weight=w1,
                             bias=b1,
                             kernel=w1.shape[2:],
                             num_filter=c1)
    h1_bn = utils.batch_norm(h1_conv, gamma1, beta1, is_training, moving_mean1,
                             moving_variance1)
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_activation,
                    pool_type='max',
                    kernel=(2, 2),
                    stride=(2, 2))

    h2_conv = nd.Convolution(data=h1,
                             weight=w2,
                             bias=b2,
                             kernel=w2.shape[2:],
                             num_filter=c2)
    h2_bn = utils.batch_norm(h2_conv, gamma2, beta2, is_training, moving_mean2,
                             moving_variance2)
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(data=h2_activation,
                    pool_type='max',
                    kernel=(2, 2),
                    stride=(2, 2))
    h2 = nd.flatten(h2)

    h3_linear = nd.dot(h2, w3) + b3
    h3 = nd.relu(h3_linear)

    h4_linear = nd.dot(h3, w4) + b4

    if verbose:
        print('h1 conv block: ', h1.shape)
        print('h2 conv block: ', h2.shape)
        print('h3 conv block: ', h3.shape)
        print('h4 conv block: ', h4_linear.shape)
        print('output: ', h4_linear)

    return h4_linear.as_in_context(ctx)

Exemple #25

0

Afficher le fichier

Fichier : icnet.py Projet : yepanl/gluon-cv

    def demo(self, x_low, x_high):
        import mxnet.ndarray as F
        x_low = F.contrib.BilinearResize2D(x_low, height=x_high.shape[2], width=x_high.shape[3])
        x_low = self.conv_low(x_low)
        x_high = self.conv_hign(x_high)

        x = x_low + x_high
        x = F.relu(x)

        x_low_cls = self.conv_low_cls(x_low)
        return x, x_low_cls

Exemple #26

0

Afficher le fichier

Fichier : icnet.py Projet : zzzzlalala/gluon-cv

    def hybrid_forward(self, F, x_low, x_high):
        x_low = F.contrib.BilinearResize2D(x_low,
                                           height=self._up_kwargs['height'],
                                           width=self._up_kwargs['width'])
        x_low = self.conv_low(x_low)
        x_high = self.conv_hign(x_high)

        x = x_low + x_high
        x = F.relu(x)

        x_low_cls = self.conv_low_cls(x_low)
        return x, x_low_cls

Exemple #27

0

Afficher le fichier

Fichier : try8_gluon.py Projet : QiXuanWang/MachineLearning

 def forward(self, x):
     """
     we separate two feature sequence and feed into two separate net and then stack them for loss 
     """
     #input: (batch, seq_len, features) for 'nwc'
     #input: (batch, features, seq_len) for 'ncw'
     #pdb.set_trace()
     convi = self._convNet(x)  #O: (n, num_filter, w)
     if self._downsample is not None:  # differ from original
         convi = self._downsample(convi)
     out = convi + x  # (n, c, w)
     return F.relu(out)

Exemple #28

0

Afficher le fichier

Fichier : RNNText.py Projet : lilhope/faker

 def forward(self, x, hidden):
     #conver NTC to TNC
     x = F.transpose(x, (1, 0, 2))
     output, hiddens = self.rnn(x, hidden)
     #print(output.shape)
     hidden = hiddens[-1]
     #print(hidden.shape)
     hidden = F.transpose(hidden, (1, 0, 2))
     output = self.fc(hidden)
     output = self.bn(output)
     output = F.relu(output)
     return output

Exemple #29

0

Afficher le fichier

 def forward(self, x):  # NCHW
     h, w = x.shape[2], x.shape[3]
     res = []
     for i in range(h):
         res.append(
             nd.stack(*self.hcell.unroll(w, x[:, :, i, :], layout='NCT')[0],
                      axis=2))  # NCW
     for i in range(w):
         res.append(
             nd.stack(*self.vcell.unroll(h, x[:, :, :, i], layout='NCT')[0],
                      axis=2))  # NCH
     res = nd.relu(nd.stack(*res[:h], axis=2) + nd.stack(*res[h:], axis=3))
     return nd.concat(x, res, dim=1)

Exemple #30

0

Afficher le fichier

def net_lenet(X, verbose=False):
    # 第一层卷积
    h1_conv = nd.Convolution(data=X,
                             weight=lenet_W1,
                             bias=lenet_b1,
                             kernel=lenet_W1.shape[2:],
                             num_filter=lenet_W1.shape[0])
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_activation,
                    pool_type="max",
                    kernel=(2, 2),
                    stride=(2, 2))
    # 第二层卷积
    h2_conv = nd.Convolution(data=h1,
                             weight=lenet_W2,
                             bias=lenet_b2,
                             kernel=lenet_W2.shape[2:],
                             num_filter=lenet_W2.shape[0])
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(data=h2_activation,
                    pool_type="max",
                    kernel=(2, 2),
                    stride=(2, 2))
    h2 = nd.flatten(h2)
    # 第一层全连接
    h3_linear = nd.dot(h2, lenet_W3) + lenet_b3
    h3 = nd.relu(h3_linear)
    # 第二层全连接
    h4_linear = nd.dot(h3, lenet_W4) + lenet_b4
    if verbose:
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense:', h3.shape)
        print('2nd dense:', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear

Exemple #31

0

Afficher le fichier

Fichier : seq2seq.py Projet : ZiyueHuang/MXSeq2Seq

    def forward(self, input, hidden, encoder_outputs):
        #input shape, (1,)
        embedded = self.embedding(input)
        if self.dropout_p > 0:
            embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(F.concat(embedded, hidden[0].flatten(), dim=1)))
        attn_applied = F.batch_dot(attn_weights.expand_dims(0),
                                 encoder_outputs.expand_dims(0))

        output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1)
        output = self.attn_combine(output).expand_dims(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

        output = self.out(output)

        return output, hidden, attn_weights