def decode(self, trg_input, src_map, oov_list, enc_context, enc_hidden, trg_mask, ctx_mask):
        '''
        :param
                trg_input:         (batch_size, trg_len)
                src_map  :         (batch_size, src_len), almost the same with src but oov words are replaced with temporary oov index, for copy mechanism to map the probs of pointed words to vocab words. The word index can be beyond vocab_size, e.g. 50000, 50001, 50002 etc, depends on how many oov words appear in the source text
                context vector:    (batch_size, src_len, hidden_size * num_direction) the outputs (hidden vectors) of encoder
        :returns
            decoder_probs       : (batch_size, trg_seq_len, vocab_size + max_oov_number)
            decoder_outputs     : (batch_size, trg_seq_len, hidden_size)
            attn_weights        : (batch_size, trg_seq_len, src_seq_len)
            copy_attn_weights   : (batch_size, trg_seq_len, src_seq_len)
        '''
        batch_size      = trg_input.size(0)
        src_len         = enc_context.size(1)
        trg_len         = trg_input.size(1)
        context_dim     = enc_context.size(2)
        trg_hidden_dim  = self.trg_hidden_dim

        # prepare the init hidden vector, (batch_size, dec_hidden_dim) -> 2 * (1, batch_size, dec_hidden_dim)
        init_hidden = self.init_decoder_state(enc_hidden[0], enc_hidden[1])

        # enc_context has to be reshaped before dot attention (batch_size, src_len, context_dim) -> (batch_size, src_len, trg_hidden_dim)
        enc_context = nn.Tanh()(self.encoder2decoder_hidden(enc_context.contiguous().view(-1, context_dim))).view(batch_size, src_len, trg_hidden_dim)

        # maximum length to unroll
        max_length  = trg_input.size(1) - 1

        # Teacher Forcing
        self.current_batch += 1
        if self.do_teacher_forcing():
            logging.info("Training batches with Teacher Forcing")
            '''
            Normal RNN procedure
            '''
            # truncate the last word, as there's no further word after it for decoder to predict
            trg_input = trg_input[:, :-1]

            # initialize target embedding and reshape the targets to be time step first
            trg_emb = self.embedding(trg_input) # (batch_size, trg_len, embed_dim)
            trg_emb  = trg_emb.permute(1, 0, 2) # (trg_len, batch_size, embed_dim)

            # both in/output of decoder LSTM is batch-second (trg_len, batch_size, trg_hidden_dim)
            decoder_outputs, hidden = self.decoder(
                trg_emb, init_hidden
            )
            # Get the h_tilde (batch_size, trg_len, trg_hidden_dim) and attention weights (batch_size, trg_len, src_len)
            h_tildes, attn_weights, attn_logits = self.attention_layer(decoder_outputs.permute(1, 0, 2), enc_context)

            # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde), (batch_size, trg_len, trg_hidden_size) -> (batch_size * trg_len, vocab_size)
            # h_tildes=(batch_size, trg_len, trg_hidden_size) -> decoder2vocab(h_tildes.view)=(batch_size * trg_len, vocab_size) -> decoder_logits=(batch_size, trg_len, vocab_size)
            decoder_logits = self.decoder2vocab(h_tildes.view(-1, trg_hidden_dim)).view(batch_size, max_length, -1)

            '''
            Copy Mechanism
            '''
            # copy_weights and copy_logits is (batch_size, trg_len, src_len)
            if self.copy_attention_layer:
                _, copy_weights, copy_logits    = self.copy_attention_layer(decoder_outputs.permute(1, 0, 2), enc_context)
            else:
                copy_logits = attn_logits

            # merge the generative and copying probs, (batch_size, trg_len, vocab_size + max_oov_number)
            decoder_log_probs   = self.merge_copy_probs(decoder_logits, copy_logits, src_map, oov_list) # (batch_size, trg_len, vocab_size + max_oov_number)
            decoder_outputs     = decoder_outputs.permute(1, 0, 2) # (batch_size, trg_len, trg_hidden_dim)

        else:
            logging.info("Training batches with All Sampling")
            '''
            Normal RNN procedure
            '''
            # take the first word (should be BOS <s>) of each target sequence (batch_size, 1)
            trg_input = trg_input[:, 0].unsqueeze(1)
            decoder_log_probs = []
            decoder_outputs= []
            attn_weights   = []
            copy_weights   = []

            for di in range(max_length):
                # initialize target embedding and reshape the targets to be time step first
                trg_emb = self.embedding(trg_input) # (batch_size, 1, embed_dim)
                trg_emb  = trg_emb.permute(1, 0, 2) # (1, batch_size, embed_dim)

                # this is trg_len first
                decoder_output, hidden = self.decoder(
                    trg_emb, init_hidden
                )

                # Get the h_tilde (hidden after attention) and attention weights. h_tilde (batch_size,1,trg_hidden), attn_weight & attn_logit(batch_size,1,src_len)
                h_tilde, attn_weight, attn_logit = self.attention_layer(decoder_output.permute(1, 0, 2), enc_context)

                # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde)
                # h_tilde=(batch_size, 1, trg_hidden_size) -> decoder2vocab(h_tilde.view)=(batch_size * 1, vocab_size) -> decoder_logit=(batch_size, 1, vocab_size)
                decoder_logit = self.decoder2vocab(h_tilde.view(-1, trg_hidden_dim)).view(batch_size, 1, -1)

                '''
                Copy Mechanism
                '''
                # copy_weights and copy_logits is (batch_size, trg_len, src_len)
                if self.copy_attention_layer:
                    _, copy_weight, copy_logit = self.copy_attention_layer(decoder_output.permute(1, 0, 2), enc_context)
                else:
                    copy_weight = attn_weight
                    copy_logit = attn_logit

                # merge the generative and copying probs (batch_size, 1, vocab_size + max_oov_number)
                decoder_log_prob   = self.merge_copy_probs(decoder_logit, copy_logit, src_map, oov_list)

                '''
                Find the next word
                '''
                # (deprecated, should not merge)before locating the topk, we need to move the probs of oovs to <unk>
                # oov2unk_prob = self.merge_oov2unk(decoder_log_prob, max_oov_number)
                top_v, top_idx = decoder_log_prob.data.topk(1, dim=-1)
                # replace the oov words to <unk>
                top_idx[top_idx >= self.vocab_size] = self.unk_word
                top_idx = Variable(top_idx.squeeze(2))
                # top_idx and next_index are (batch_size, 1)
                trg_input = top_idx.cuda() if torch.cuda.is_available() else top_idx

                # permute to trg_len first, otherwise the cat operation would mess up things
                decoder_log_probs.append(decoder_log_prob.permute(1, 0, 2))
                decoder_outputs.append(decoder_output)
                attn_weights.append(attn_weight.permute(1, 0, 2))
                copy_weights.append(copy_weight.permute(1, 0, 2))

            # convert output into the right shape and make batch first
            decoder_log_probs   = torch.cat(decoder_log_probs, 0).permute(1, 0, 2)  # (batch_size, trg_seq_len, vocab_size + max_oov_number)
            decoder_outputs     = torch.cat(decoder_outputs, 0).permute(1, 0, 2)  # (batch_size, trg_seq_len, hidden_size)
            attn_weights        = torch.cat(attn_weights, 0).permute(1, 0, 2)  # (batch_size, trg_seq_len, src_seq_len)
            copy_weights        = torch.cat(copy_weights, 0).permute(1, 0, 2)  # (batch_size, trg_seq_len, src_seq_len)

        # Return final outputs (logits after log_softmax), hidden states, and attention weights (for visualization)
        return decoder_log_probs, decoder_outputs, attn_weights, copy_weights
    def __init__(self, input_nc, output_nc, ngf=32, n_downsampling=4, norm_layer=nn.BatchNorm2d):
        super(Encoder, self).__init__()
        self.output_nc = output_nc

        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0),
            norm_layer(ngf),
            nn.ReLU(True),
        ]
        ### downsample
        for i in range(n_downsampling):
            mult = 2 ** i
            model += [
                nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1),
                norm_layer(ngf * mult * 2),
                nn.ReLU(True),
            ]

        ### upsample
        for i in range(n_downsampling):
            mult = 2 ** (n_downsampling - i)
            model += [
                nn.ConvTranspose2d(
                    ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1
                ),
                norm_layer(int(ngf * mult / 2)),
                nn.ReLU(True),
            ]

        model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()]
        self.model = nn.Sequential(*model)
Exemple #3
0
    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'):
        assert(n_blocks >= 0)
        super(ResnetGeneratorMMReverse, self).__init__()
        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf
        self.gpu_ids = gpu_ids
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        model = [nn.ReflectionPad2d(3),

                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0,
                           bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]


        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2**i
            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
                                stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]

        pre_f_blocks = 4
	pre_l_blocks = 7
        mult = 2**n_downsampling

        for i in range(pre_f_blocks):
            model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	model_pre = []
	for i in range(pre_f_blocks,pre_l_blocks):
            model_pre += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	

	model_post1 = []
	model_post2 = []
	for i in range(pre_l_blocks,n_blocks):
            model_post1 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	    model_post2 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]


        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            model_post1 += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]

            model_post2 += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]


        model_post1 += [nn.ReflectionPad2d(3)]
        model_post1 += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model_post1 += [nn.Tanh()]


        model_post2 += [nn.ReflectionPad2d(3)]
        model_post2 += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model_post2 += [nn.Tanh()]

	self.model_post1 = nn.Sequential(*model_post1)
	self.model_post2 = nn.Sequential(*model_post2)
        self.model_pre = nn.Sequential(*model_pre)
        self.model = nn.Sequential(*model)
Exemple #4
0
    def __init__(self,
                 vocabs,
                 opt,
                 predictor_tgt=None,
                 predictor_src=None,
                 PreModelClass='TransformerPredictor'):

        super().__init__(vocabs=vocabs, opt=opt)

        if not predictor_tgt:
            if opt.load_pred_target:
                predictor_tgt = eval(PreModelClass).from_file(
                    opt.load_pred_target, opt)
            else:
                predictor_tgt = eval(PreModelClass)(vocabs,
                                                    opt,
                                                    predict_inverse=False)

        if not predictor_src:
            if opt.load_pred_source:
                predictor_src = eval(PreModelClass).from_file(
                    opt.load_pred_source, opt)
            else:
                predictor_src = eval(PreModelClass)(vocabs,
                                                    opt,
                                                    predict_inverse=True)

        if opt.token_level:
            if predictor_src:
                predictor_src.vocabs = vocabs
            if predictor_tgt:
                predictor_tgt.vocabs = vocabs

        self.predictor_tgt = predictor_tgt
        self.predictor_src = predictor_src

        self.mlp = None
        self.sentence_pred = None
        self.sentence_sigma = None
        self.lstm_input_size = 2 * opt.hidden_pred + opt.out_embeddings_size

        if opt.mlp_est:
            self.mlp = nn.Sequential(
                nn.Linear(self.lstm_input_size, opt.hidden_est), nn.Tanh())
            self.lstm_input_size = opt.hidden_est

        self.lstm = nn.LSTM(
            input_size=self.lstm_input_size,
            hidden_size=self.opt.hidden_est,
            num_layers=self.opt.rnn_layers_est,
            batch_first=True,
            dropout=self.opt.dropout_est,
            bidirectional=True,
        )

        sentence_input_size = 2 * opt.rnn_layers_est * opt.hidden_est
        self.sentence_pred = nn.Sequential(
            nn.Linear(sentence_input_size, sentence_input_size // 2),
            nn.Sigmoid(),
            nn.Linear(sentence_input_size // 2, sentence_input_size // 4),
            nn.Sigmoid(),
            nn.Linear(sentence_input_size // 4, 1),
        )
        if self.opt.sentence_ll:
            # Predict truncated Gaussian distribution
            self.sentence_sigma = nn.Sequential(
                nn.Linear(sentence_input_size, sentence_input_size // 2),
                nn.Sigmoid(),
                nn.Linear(sentence_input_size // 2, sentence_input_size // 4),
                nn.Sigmoid(),
                nn.Linear(sentence_input_size // 4, 1),
                nn.Sigmoid(),
            )

        self.mse_loss = nn.MSELoss(reduction='sum')

        if opt.start_stop:
            self.start_PreQEFV = nn.Parameter(
                torch.zeros(1, 1, opt.out_embeddings_size))
            self.end_PreQEFV = nn.Parameter(
                torch.zeros(1, 1, opt.out_embeddings_size))

        self.opt = opt
Exemple #5
0
    def __init__(self,
                 cell='gru',
                 use_baseline=True,
                 n_actions=10,
                 n_units=64,
                 fusion_dim=128,
                 n_input=76,
                 n_hidden=128,
                 demo_dim=17,
                 n_output=1,
                 dropout=0.0,
                 lamda=0.5,
                 device='cpu'):
        super(Agent, self).__init__()

        self.cell = cell
        self.use_baseline = use_baseline
        self.n_actions = n_actions
        self.n_units = n_units
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.dropout = dropout
        self.lamda = lamda
        self.fusion_dim = fusion_dim
        self.demo_dim = demo_dim
        self.device = device

        self.agent1_action = []
        self.agent1_prob = []
        self.agent1_entropy = []
        self.agent1_baseline = []
        self.agent2_action = []
        self.agent2_prob = []
        self.agent2_entropy = []
        self.agent2_baseline = []

        self.agent1_fc1 = nn.Linear(self.n_hidden + self.demo_dim,
                                    self.n_units)
        self.agent2_fc1 = nn.Linear(self.n_input + self.demo_dim, self.n_units)
        self.agent1_fc2 = nn.Linear(self.n_units, self.n_actions)
        self.agent2_fc2 = nn.Linear(self.n_units, self.n_actions)
        if use_baseline == True:
            self.agent1_value = nn.Linear(self.n_units, 1)
            self.agent2_value = nn.Linear(self.n_units, 1)

        if self.cell == 'lstm':
            self.rnn = nn.LSTMCell(self.n_input, self.n_hidden)
        else:
            self.rnn = nn.GRUCell(self.n_input, self.n_hidden)

        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.0)
            elif 'weight' in name:
                nn.init.orthogonal_(param)

        if dropout > 0.0:
            self.nn_dropout = nn.Dropout(p=dropout)
        self.init_h = nn.Linear(self.demo_dim, self.n_hidden)
        self.init_c = nn.Linear(self.demo_dim, self.n_hidden)
        self.fusion = nn.Linear(self.n_hidden + self.demo_dim, self.fusion_dim)
        self.output = nn.Linear(self.fusion_dim, self.n_output)

        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
Exemple #6
0
    out = (out > 0.5) * 1
    return out.data.numpy()
# 绘制出决策面
plot_decision_boundary(lambda x: plot_network(x), x.numpy(), y.numpy())
plt.title("two layer network")
plt.show()




"""下面介绍Sequential与Module"""
# Sequential 允许我们构建序列化的模块,而 Module 是一种更加灵活的模型定义方式
# Sequential方法构建模型
seq_net = nn.Sequential(
    nn.Linear(2, 4),  # 输入层是2个神经元,隐藏层是4个神经元   xw + b
    nn.Tanh(),
    nn.Linear(4, 1)  # 隐藏层是4个神经元,输出层是1个神经元
)
# 序列模块可以通过索引访问每一层
print(seq_net[0])
# 打印出第一层的权重
print(seq_net[0].weight)
# 通过parameters可以获得模型的参数
param = seq_net.parameters()
# 定义优化算法
optimizer = optim.SGD(param, 1.)
# 训练10000次
for epoch in range(10000):
    out = seq_net(Variable(x))
    loss = loss_func(out, Variable(y))
    optimizer.zero_grad()
    def __init__(self, cfg):
        super(VqaModelDncQC, self).__init__()
        self.cfg = cfg
        self.img_encoder = ImgEncoder(
            cfg["hyperparameters"]["commun_embed_size"])
        self.qst_encoder = QstEncoderDnc(cfg)
        if cfg["dnc_c"]["nonlinearity"] == "tanh":
            self.nonlinearity = nn.Tanh()
        elif cfg["dnc_c"]["nonlinearity"] == "relu":
            self.nonlinearity = nn.ReLU()
        elif cfg["dnc_c"]["nonlinearity"] == "sigmoid":
            self.nonlinearity = nn.Sigmoid()
        else:
            raise ValueError(
                "<{}> is not a valid non-linearity function.".format(
                    cfg["dnc_c"]["nonlinearity"]))
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(cfg["hyperparameters"]["dropout"])
        if cfg["dnc_c"]["type"] == "MLP":
            self.dnc = DNC_MLP(
                input_size=cfg["hyperparameters"]["commun_embed_size"],
                output_size=cfg["dnc_c"]["output_size"],
                hidden_size=cfg["dnc_c"]["hidden_dim"],
                num_hidden_layers=cfg["dnc_c"]["num_layers_hidden"],
                dropout=cfg["dnc_c"]["dropout"],
                nr_cells=cfg["dnc_c"]["n"],
                cell_size=cfg["dnc_c"]["w"],
                read_heads=cfg["dnc_c"]["r"],
                nonlinearity=self.nonlinearity,
                gpu_id=cfg["hyperparameters"]["gpu_id"],
                debug=cfg["dnc_c"]["debug"],
                clip=20,
            )
        elif cfg["dnc_c"]["type"] == "LSTM":
            self.dnc = DNC(
                input_size=cfg["hyperparameters"]["commun_embed_size"],
                output_size=cfg["dnc_c"]["output_size"],
                hidden_size=cfg["dnc_c"]["hidden_dim"],
                rnn_type=cfg["dnc_c"]["rnn_type"],
                num_layers=cfg["dnc_c"]["num_layers"],
                num_hidden_layers=cfg["dnc_c"]["num_layers_hidden"],
                bias=True,
                batch_first=True,
                dropout=cfg["dnc_c"]["dropout"],
                bidirectional=cfg["dnc_c"]["bidirectional"],
                nr_cells=cfg["dnc_c"]["n"],
                cell_size=cfg["dnc_c"]["w"],
                read_heads=cfg["dnc_c"]["r"],
                gpu_id=cfg["hyperparameters"]["gpu_id"],
                independent_linears=True,
                share_memory=True,
                debug=cfg["dnc_c"]["debug"],
                clip=20)
        else:
            raise ValueError("dnc controller type <{}> is not defined".format(
                cfg["dnc"]["dnc_c_type"]))

        if cfg["dnc_c"]["concat_out_rv"]:
            in_fc_1 = cfg["dnc_c"][
                "output_size"] + cfg["dnc_c"]["w"] * cfg["dnc_c"]["r"]
        else:
            in_fc_1 = cfg["dnc_c"]["output_size"]
        self.fc_1 = nn.Linear(in_fc_1,
                              cfg["hyperparameters"]["ans_vocab_size"])
        self.fc_2 = nn.Linear(cfg["hyperparameters"]["ans_vocab_size"],
                              cfg["hyperparameters"]["ans_vocab_size"])
Exemple #8
0
 def __init__(self, input_size, hidden_size, output_size):
     super(TwoLayerLRSeq, self).__init__()
     self.model = nn.Sequential(nn.Linear(input_size, hidden_size),
                                nn.Tanh(),
                                nn.Linear(hidden_size, output_size))
 def __init__(self, input_size, use_stn=False, use_attention=False):
     super(PPG2ECG, self).__init__()
     self.use_stn = use_stn
     self.use_attention = use_attention
     # build main transformer
     self.main = nn.Sequential(
         # encoder
         nn.Conv1d(1, 32, kernel_size=31, stride=2, padding=15),
         nn.PReLU(32),
         nn.Conv1d(32, 64, 31, 1, 15),
         nn.PReLU(64),
         nn.Conv1d(64, 128, 31, 2, 15),
         nn.PReLU(128),
         nn.Conv1d(128, 256, 31, 1, 15),
         nn.PReLU(256),
         nn.Conv1d(256, 512, 31, 2, 15),
         nn.PReLU(512),
         # decoder
         nn.ConvTranspose1d(
             512, 256, kernel_size=31, stride=2,
             padding=15, output_padding=1),
         nn.PReLU(256),
         nn.ConvTranspose1d(256, 128, 31, 1, 15),
         nn.PReLU(128),
         nn.ConvTranspose1d(128, 64, 31, 2, 15, 1),
         nn.PReLU(64),
         nn.ConvTranspose1d(64, 32, 31, 1, 15),
         nn.PReLU(32),
         nn.ConvTranspose1d(32, 1, 31, 2, 15, 1),
         nn.Tanh(),
     )
     # build stn (optional)
     if use_stn:
         # pylint: disable=not-callable
         self.restriction = torch.tensor(
             [1, 0, 0, 0], dtype=torch.float, requires_grad=False)
         self.register_buffer('restriction_const', self.restriction)
         self.stn_conv = nn.Sequential(
             nn.Conv1d(
                 in_channels=1, out_channels=8, kernel_size=7, stride=1),
             nn.MaxPool1d(kernel_size=2, stride=2),
             nn.Conv1d(
                 in_channels=8, out_channels=10, kernel_size=5, stride=1),
             nn.MaxPool1d(kernel_size=2, stride=2),
         )
         n_stn_conv = self.get_stn_conv_out(input_size)
         self.stn_fc = nn.Sequential(
             Flatten(),
             nn.Linear(n_stn_conv, 32),
             nn.ReLU(True),
             nn.Linear(32, 4)
         )
         self.stn_fc[3].weight.data.zero_()
         self.stn_fc[3].bias.data = torch.FloatTensor([1, 0, 1, 0])
     # build attention network (optional)
     if use_attention:
         self.attn = nn.Sequential(
             nn.Linear(input_size, input_size),
             nn.ReLU(),
             nn.Linear(input_size, input_size)
         )
         self.attn_len = input_size
    def __init__(self,
                 input_nc,
                 output_nc,
                 ngf=64,
                 norm_layer=nn.BatchNorm3d,
                 use_dropout=False,
                 n_blocks=6,
                 padding_type='replicate'):
        """Construct a Resnet-based generator

        Parameters:
            input_nc (int)      -- the number of channels in input images
            output_nc (int)     -- the number of channels in output images
            ngf (int)           -- the number of filters in the last conv layer
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers
            n_blocks (int)      -- the number of ResNet blocks
            padding_type (str)  -- the name of padding layer in conv layers: reflect | replicate | zero
        """
        assert (n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm3d
        else:
            use_bias = norm_layer == nn.InstanceNorm3d

        model = [
            nn.ReplicationPad3d(3),
            nn.Conv3d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias),
            norm_layer(ngf),
            nn.ReLU(True)
        ]

        n_downsampling = 2
        for i in range(n_downsampling):  # add downsampling layers
            mult = 3**i
            model += [
                nn.Conv3d(ngf * mult,
                          ngf * mult * 3,
                          kernel_size=3,
                          stride=2,
                          padding=1,
                          bias=use_bias),
                norm_layer(ngf * mult * 3),
                nn.ReLU(True)
            ]

        mult = 3**n_downsampling
        for i in range(n_blocks):  # add ResNet blocks

            model += [
                ResnetBlock3D(ngf * mult,
                              padding_type=padding_type,
                              norm_layer=norm_layer,
                              use_dropout=use_dropout,
                              use_bias=use_bias)
            ]

        for i in range(n_downsampling):  # add upsampling layers
            mult = 3**(n_downsampling - i)
            model += [
                nn.ConvTranspose3d(ngf * mult,
                                   int(ngf * mult / 2),
                                   kernel_size=3,
                                   stride=2,
                                   padding=1,
                                   output_padding=1,
                                   bias=use_bias),
                norm_layer(int(ngf * mult / 2)),
                nn.ReLU(True)
            ]
        model += [nn.ReplicationPad3d(3)]
        model += [nn.Conv3d(ngf, output_nc, kernel_size=7, padding=0)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)
Exemple #11
0
 def __init__(self, input_size, hidden_size, output_size):
     super(TwoLayerLR, self).__init__()
     self.fc1 = nn.Linear(input_size, hidden_size)
     self.tanh = nn.Tanh()
     self.fc2 = nn.Linear(hidden_size, output_size)
    def __init__(self,
                 outer_nc,
                 inner_nc,
                 input_nc=None,
                 submodule=None,
                 outermost=False,
                 innermost=False,
                 norm_layer=nn.BatchNorm2d,
                 use_dropout=False):
        """Construct a Unet submodule with skip connections.

        Parameters:
            outer_nc (int) -- the number of filters in the outer conv layer
            inner_nc (int) -- the number of filters in the inner conv layer
            input_nc (int) -- the number of channels in input images/features
            submodule (UnetSkipConnectionBlock) -- previously defined submodules
            outermost (bool)    -- if this module is the outermost module
            innermost (bool)    -- if this module is the innermost module
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers.
        """
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
        if input_nc is None:
            input_nc = outer_nc
        downconv = nn.Conv2d(input_nc,
                             inner_nc,
                             kernel_size=4,
                             stride=2,
                             padding=1,
                             bias=use_bias)
        downrelu = nn.LeakyReLU(0.2, True)
        downnorm = norm_layer(inner_nc)
        uprelu = nn.ReLU(True)
        upnorm = norm_layer(outer_nc)

        if outermost:
            upconv = nn.ConvTranspose2d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1)
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:
            upconv = nn.ConvTranspose2d(inner_nc,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:
            upconv = nn.ConvTranspose2d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            if use_dropout:
                model = down + [submodule] + up + [nn.Dropout(0.5)]
            else:
                model = down + [submodule] + up

        self.model = nn.Sequential(*model)
Exemple #13
0
    def forward(self, input):
        tanh = nn.Tanh()
        height_batch = self.preprocess(input)
        height_batch = height_batch.view(-1, 4 * self.ngf, 4, 4)
        _4x4 = height_batch
        _8x8 = self._4_to_8(_4x4)
        _16x16 = self._8_to_16(_8x8)
        upsample = nn.Upsample(size=(32, 32), mode='bilinear')
        height_batch = (tanh(self._16_to_32(_16x16)) + \
                        upsample(tanh(self._16_to_16(_16x16))) + \
                        upsample(tanh(self._8_to_8(_8x8))) + \
                        upsample(tanh(self._4_to_4(_4x4)))) / 4.0
        height_batch = height_batch.permute(0, 3, 2, 1)
        if np.any(np.isnan(height_batch.data.numpy())):
            print('NANNANNAN')
            exit()
        if self.save_heightfield:
            height_batch_np = height_batch.data.numpy()
            height_flatten = np.zeros([32 * 8, 32 * 8, 1])
            for i in range(8):
                for j in range(8):
                    img = height_batch_np[8 * i + j, :, :, :]
                    height_flatten[32 * i:32 * (i + 1),
                                   32 * j:32 * (j + 1), :] = img
            image.imwrite(
                height_flatten.squeeze(),
                'results/heightfield_gan/heightfield_%06d.png' % iteration)

        output = Variable(torch.zeros([input.shape[0], 1, 32, 32]))
        for i in range(input.shape[0]):
            height = torch.stack([\
                Variable(torch.from_numpy(np.zeros(heightfield_res, dtype=np.float32))),
                height_batch[i, :, :, 0],
                Variable(torch.from_numpy(np.zeros(heightfield_res, dtype=np.float32)))],
                dim=-1)
            height = height.view([-1, 3])
            shape_plane.vertices = plane_vertices + height
            if self.save_heightfield:
                v = shape_plane.vertices.data.numpy()
                ind = shape_plane.indices.data.numpy() + 1
                with open('results/heightfield_gan/model_%06d_%03d.obj' \
                        % (self.iteration, i), 'w') as f:
                    for vid in range(v.shape[0]):
                        f.write('v %f %f %f\n' %
                                (v[vid, 0], v[vid, 1], v[vid, 2]))
                    for iid in range(ind.shape[0]):
                        f.write('f %d %d %d\n' %
                                (ind[iid, 0], ind[iid, 1], ind[iid, 2]))

            shape_plane.normals = compute_vertex_normal(
                shape_plane.vertices, shape_plane.indices)
            cam = camera.Camera(\
                    position     = Variable(torch.from_numpy(np.array([self.xz[i][0], 3, self.xz[i][1]], dtype=np.float32))),
                    look_at      = Variable(torch.from_numpy(np.array([0, 0,  0], dtype=np.float32))),
                    up           = Variable(torch.from_numpy(np.array([0, 1,  0], dtype=np.float32))),
                    cam_to_world = None,
                    fov          = Variable(torch.from_numpy(np.array([45.0], dtype=np.float32))),
                    clip_near    = Variable(torch.from_numpy(np.array([0.01], dtype=np.float32))),
                    clip_far     = Variable(torch.from_numpy(np.array([10000.0], dtype=np.float32))),
                    resolution   = self.resolution)
            args = render_pytorch.RenderFunction.serialize_scene(\
                cam,materials,shapes,lights,self.resolution,4,1)
            render = render_pytorch.RenderFunction.apply
            img = render(random.randint(0, 1048576), *args)
            img = img.permute([2, 1, 0])
            output[i, :, :, :] = img[0, :, :]
        return output
Exemple #14
0
    def __init__(self, input_size):
        super().__init__()

        self.dense = nn.Linear(input_size, input_size)
        self.activation = nn.Tanh()
Exemple #15
0
    def __init__(
        self,
        in_channels: int,
        latent_dim: int,
        hidden_dims: List = None,
        hidden_dims2: List = None,
        lr: float = 0.005,
        weight_decay: Optional[float] = 0,
        scheduler_gamma: Optional[float] = 0.95,
    ) -> None:
        super(TwoStageVAE, self).__init__(lr=lr,
                                          weight_decay=weight_decay,
                                          scheduler_gamma=scheduler_gamma)

        self.latent_dim = latent_dim

        modules = []
        if hidden_dims is None:
            hidden_dims = [32, 64, 128, 256, 512]

        if hidden_dims2 is None:
            hidden_dims2 = [1024, 1024]

        # Build Encoder
        for h_dim in hidden_dims:
            modules.append(
                nn.Sequential(
                    nn.Conv2d(
                        in_channels,
                        out_channels=h_dim,
                        kernel_size=3,
                        stride=2,
                        padding=1,
                    ),
                    nn.BatchNorm2d(h_dim),
                    nn.LeakyReLU(),
                ))
            in_channels = h_dim

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1] * 4, latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1] * 4, latent_dim)

        # Build Decoder
        modules = []
        self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * 4)
        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(
                nn.Sequential(
                    nn.ConvTranspose2d(
                        hidden_dims[i],
                        hidden_dims[i + 1],
                        kernel_size=3,
                        stride=2,
                        padding=1,
                        output_padding=1,
                    ),
                    nn.BatchNorm2d(hidden_dims[i + 1]),
                    nn.LeakyReLU(),
                ))
        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(
            nn.ConvTranspose2d(
                hidden_dims[-1],
                hidden_dims[-1],
                kernel_size=3,
                stride=2,
                padding=1,
                output_padding=1,
            ),
            nn.BatchNorm2d(hidden_dims[-1]),
            nn.LeakyReLU(),
            nn.Conv2d(hidden_dims[-1],
                      out_channels=3,
                      kernel_size=3,
                      padding=1),
            nn.Tanh(),
        )

        # ---------------------- Second VAE ---------------------------#
        encoder2 = []
        in_channels = self.latent_dim
        for h_dim in hidden_dims2:
            encoder2.append(
                nn.Sequential(nn.Linear(in_channels, h_dim),
                              nn.BatchNorm1d(h_dim), nn.LeakyReLU()))
            in_channels = h_dim
        self.encoder2 = nn.Sequential(*encoder2)
        self.fc_mu2 = nn.Linear(hidden_dims2[-1], self.latent_dim)
        self.fc_var2 = nn.Linear(hidden_dims2[-1], self.latent_dim)

        decoder2 = []
        hidden_dims2.reverse()

        in_channels = self.latent_dim
        for h_dim in hidden_dims2:
            decoder2.append(
                nn.Sequential(nn.Linear(in_channels, h_dim),
                              nn.BatchNorm1d(h_dim), nn.LeakyReLU()))
            in_channels = h_dim
        self.decoder2 = nn.Sequential(*decoder2)
Exemple #16
0
    def __init__(self, norm_layer=nn.BatchNorm2d, classes=529):
        super(SIGGRAPHGenerator, self).__init__()

        # Conv1
        model1 = [
            nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model1 += [
            nn.ReLU(True),
        ]
        model1 += [
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model1 += [
            nn.ReLU(True),
        ]
        model1 += [
            norm_layer(64),
        ]
        # add a subsampling operation

        # Conv2
        model2 = [
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model2 += [
            nn.ReLU(True),
        ]
        model2 += [
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model2 += [
            nn.ReLU(True),
        ]
        model2 += [
            norm_layer(128),
        ]
        # add a subsampling layer operation

        # Conv3
        model3 = [
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model3 += [
            nn.ReLU(True),
        ]
        model3 += [
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model3 += [
            nn.ReLU(True),
        ]
        model3 += [
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model3 += [
            nn.ReLU(True),
        ]
        model3 += [
            norm_layer(256),
        ]
        # add a subsampling layer operation

        # Conv4
        model4 = [
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model4 += [
            nn.ReLU(True),
        ]
        model4 += [
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model4 += [
            nn.ReLU(True),
        ]
        model4 += [
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model4 += [
            nn.ReLU(True),
        ]
        model4 += [
            norm_layer(512),
        ]

        # Conv5
        model5 = [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model5 += [
            nn.ReLU(True),
        ]
        model5 += [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model5 += [
            nn.ReLU(True),
        ]
        model5 += [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model5 += [
            nn.ReLU(True),
        ]
        model5 += [
            norm_layer(512),
        ]

        # Conv6
        model6 = [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model6 += [
            nn.ReLU(True),
        ]
        model6 += [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model6 += [
            nn.ReLU(True),
        ]
        model6 += [
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      dilation=2,
                      stride=1,
                      padding=2,
                      bias=True),
        ]
        model6 += [
            nn.ReLU(True),
        ]
        model6 += [
            norm_layer(512),
        ]

        # Conv7
        model7 = [
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model7 += [
            nn.ReLU(True),
        ]
        model7 += [
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model7 += [
            nn.ReLU(True),
        ]
        model7 += [
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model7 += [
            nn.ReLU(True),
        ]
        model7 += [
            norm_layer(512),
        ]

        # Conv7
        model8up = [
            nn.ConvTranspose2d(512,
                               256,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=True)
        ]
        model3short8 = [
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]

        model8 = [
            nn.ReLU(True),
        ]
        model8 += [
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model8 += [
            nn.ReLU(True),
        ]
        model8 += [
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model8 += [
            nn.ReLU(True),
        ]
        model8 += [
            norm_layer(256),
        ]

        # Conv9
        model9up = [
            nn.ConvTranspose2d(256,
                               128,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=True),
        ]
        model2short9 = [
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        # add the two feature maps above

        model9 = [
            nn.ReLU(True),
        ]
        model9 += [
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        model9 += [
            nn.ReLU(True),
        ]
        model9 += [
            norm_layer(128),
        ]

        # Conv10
        model10up = [
            nn.ConvTranspose2d(128,
                               128,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=True),
        ]
        model1short10 = [
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True),
        ]
        # add the two feature maps above

        model10 = [
            nn.ReLU(True),
        ]
        model10 += [
            nn.Conv2d(128,
                      128,
                      kernel_size=3,
                      dilation=1,
                      stride=1,
                      padding=1,
                      bias=True),
        ]
        model10 += [
            nn.LeakyReLU(negative_slope=.2),
        ]

        # classification output
        model_class = [
            nn.Conv2d(256,
                      classes,
                      kernel_size=1,
                      padding=0,
                      dilation=1,
                      stride=1,
                      bias=True),
        ]

        # regression output
        model_out = [
            nn.Conv2d(128,
                      2,
                      kernel_size=1,
                      padding=0,
                      dilation=1,
                      stride=1,
                      bias=True),
        ]
        model_out += [nn.Tanh()]

        self.model1 = nn.Sequential(*model1)
        self.model2 = nn.Sequential(*model2)
        self.model3 = nn.Sequential(*model3)
        self.model4 = nn.Sequential(*model4)
        self.model5 = nn.Sequential(*model5)
        self.model6 = nn.Sequential(*model6)
        self.model7 = nn.Sequential(*model7)
        self.model8up = nn.Sequential(*model8up)
        self.model8 = nn.Sequential(*model8)
        self.model9up = nn.Sequential(*model9up)
        self.model9 = nn.Sequential(*model9)
        self.model10up = nn.Sequential(*model10up)
        self.model10 = nn.Sequential(*model10)
        self.model3short8 = nn.Sequential(*model3short8)
        self.model2short9 = nn.Sequential(*model2short9)
        self.model1short10 = nn.Sequential(*model1short10)

        self.model_class = nn.Sequential(*model_class)
        self.model_out = nn.Sequential(*model_out)

        self.upsample4 = nn.Sequential(*[
            nn.Upsample(scale_factor=4, mode='bilinear'),
        ])
        self.softmax = nn.Sequential(*[
            nn.Softmax(dim=1),
        ])
 def __init__(self, config):
     super().__init__()
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.activation = nn.Tanh()
Exemple #18
0
import numpy as np
import csv
import math
import os.path
import timeit
from collections import deque
import pickle
from multiprocessing import Pool

N, D_in, H, D_out = 4361, 22, 100, 10

# = Variable(torch.zeros(N, D_in),requires_grad=False)
#y = Variable(torch.zeros(N, 3), requires_grad=False)
relu = nn.ReLU()
sig = nn.Sigmoid()
tanh = nn.Tanh()
loss_fn = nn.MSELoss()
learning_rate = 0.0001


class CarData:
    def __init__(self, dataList):
        self.outputdata = []
        self.sensordata = []
        self.outputdata.append(dataList[0])
        self.outputdata.append(dataList[1])
        self.outputdata.append(dataList[2])
        for i in range(3, len(dataList)):
            self.sensordata.append(dataList[i])

    def get_output_data(self):
Exemple #19
0
 def __init__(self, num_input, num_hidden, num_output):
     super(Module_model, self).__init__()
     self.layer1 = nn.Linear(num_input, num_hidden)
     self.layer2 = nn.Tanh()
     self.layer3 = nn.Linear(num_hidden, num_output)
Exemple #20
0
    def __init__(self, out_dim):
        super(SAP, self).__init__()

        # Setup
        self.act_fn = nn.Tanh()
        self.sap_layer = SelfAttentionPooling(out_dim)
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=True,
                 qk_scale=None,
                 representation_size=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 hybrid_backbone=None,
                 norm_layer=None):
        """
        Args:
            img_size (int, tuple): input image size
            patch_size (int, tuple): patch size
            in_chans (int): number of input channels
            num_classes (int): number of classes for classification head
            embed_dim (int): embedding dimension
            depth (int): depth of transformer
            num_heads (int): number of attention heads
            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
            qkv_bias (bool): enable bias for qkv if True
            qk_scale (float): override default qk scale of head_dim ** -0.5 if set
            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
            drop_rate (float): dropout rate
            attn_drop_rate (float): attention dropout rate
            drop_path_rate (float): stochastic depth rate
            hybrid_backbone (nn.Module): CNN backbone to use in-place of PatchEmbed module
            norm_layer: (nn.Module): normalization layer
        """
        super().__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)

        if hybrid_backbone is not None:
            self.patch_embed = HybridEmbed(hybrid_backbone,
                                           img_size=img_size,
                                           in_chans=in_chans,
                                           embed_dim=embed_dim)
        else:
            self.patch_embed = PatchEmbed(img_size=img_size,
                                          patch_size=patch_size,
                                          in_chans=in_chans,
                                          embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
               ]  # stochastic depth decay rule
        self.blocks = nn.ModuleList([
            Block(dim=embed_dim,
                  num_heads=num_heads,
                  mlp_ratio=mlp_ratio,
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[i],
                  norm_layer=norm_layer) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # Representation layer
        if representation_size:
            self.num_features = representation_size
            self.pre_logits = nn.Sequential(
                OrderedDict([('fc', nn.Linear(embed_dim, representation_size)),
                             ('act', nn.Tanh())]))
        else:
            self.pre_logits = nn.Identity()

        # Classifier head
        self.head = nn.Linear(
            self.num_features,
            num_classes) if num_classes > 0 else nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Exemple #22
0
    def __init__(self, conv_dim=64):
        super(Generator, self).__init__()
        input_dim = 256
        curr_dim = input_dim

        # Makeup representation fully connected layer
        layers_makeup = []
        layers_makeup.append(nn.Linear(32, 256))
        layers_makeup.append(nn.ReLU(inplace=True))
        layers_makeup.append(nn.Linear(256, 256))
        layers_makeup.append(nn.ReLU(inplace=True))
        layers_makeup.append(nn.Linear(256, 512))

        self.makeup = nn.Sequential(*layers_makeup)

        # Pose and Face blend
        layers_blend = []
        for i in range(4):
            layers_blend.append(
                ResidualBlock(dim_in=input_dim * 2, dim_out=input_dim * 2))
        layers_blend.append(
            nn.Conv2d(input_dim * 2,
                      input_dim,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))
        layers_blend.append(nn.InstanceNorm2d(input_dim, affine=True))
        layers_blend.append(nn.ReLU(inplace=True))

        self.blend = nn.Sequential(*layers_blend)

        # Main Generator
        self.res_1 = ResidualBlock(dim_in=input_dim, dim_out=input_dim)
        self.res_2 = ResidualBlock(dim_in=input_dim, dim_out=input_dim)
        self.res_3 = ResidualBlock(dim_in=input_dim, dim_out=input_dim)
        self.res_4 = ResidualBlock(dim_in=input_dim, dim_out=input_dim)

        # Up-Sampling
        layers = []
        for i in range(2):
            layers.append(
                nn.ConvTranspose2d(curr_dim,
                                   curr_dim // 2,
                                   kernel_size=4,
                                   stride=2,
                                   padding=1,
                                   bias=False))
            layers.append(nn.InstanceNorm2d(curr_dim // 2, affine=True))
            layers.append(nn.ReLU(inplace=True))
            curr_dim = curr_dim // 2

        layers.append(
            nn.Conv2d(curr_dim,
                      3,
                      kernel_size=7,
                      stride=1,
                      padding=3,
                      bias=False))
        layers.append(nn.InstanceNorm2d(3, affine=True))
        layers.append(nn.Tanh())

        self.main = nn.Sequential(*layers)
Exemple #23
0
    def __init__(self,
                 outer_nc,
                 inner_nc,
                 input_nc=None,
                 submodule=None,
                 outermost=False,
                 innermost=False,
                 norm_layer=nn.BatchNorm2d,
                 use_dropout=False):
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
        if input_nc is None:
            input_nc = outer_nc
        downconv = nn.Conv2d(input_nc,
                             inner_nc,
                             kernel_size=4,
                             stride=2,
                             padding=1,
                             bias=use_bias)
        downrelu = nn.LeakyReLU(0.2, True)
        downnorm = norm_layer(inner_nc)
        uprelu = nn.ReLU(True)
        upnorm = norm_layer(outer_nc)

        if outermost:
            upconv = nn.ConvTranspose2d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1)
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:
            upconv = nn.ConvTranspose2d(inner_nc,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:
            upconv = nn.ConvTranspose2d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            if use_dropout:
                model = down + [submodule] + up + [nn.Dropout(0.5)]
            else:
                model = down + [submodule] + up

        self.model = nn.Sequential(*model)
Exemple #24
0
    def __init__(self, receptive_field=1024, mgc_size=60, upsample_size=200, filter_size=256,
                 num_blocks=4):
        super(VocoderNetwork, self).__init__()

        self.RECEPTIVE_FIELD = receptive_field
        self.NUM_NETWORKS = 1
        self.MGC_SIZE = mgc_size
        self.UPSAMPLE_SIZE = upsample_size
        self.NUM_BLOCKS = num_blocks

        self.convolutions = torch.nn.ModuleList(
            [WaveNet(self.RECEPTIVE_FIELD, mgc_size, filter_size) for ii in range(num_blocks)])

        self.conditioning = nn.Sequential(nn.Linear(mgc_size, mgc_size * upsample_size), nn.Tanh())

        self.pre_output = torch.nn.ModuleList([nn.Linear(filter_size, 256) for ii in range(num_blocks)])
        self.mean_layer = torch.nn.ModuleList([nn.Linear(256, 1) for ii in range(num_blocks)])
        self.stdev_layer = torch.nn.ModuleList([nn.Linear(256, 1) for ii in range(num_blocks)])
Exemple #25
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 kernel_size,
                 stride,
                 padding=0,
                 conv_padding=0,
                 norm='none',
                 activation='relu',
                 pad_type='zero',
                 transpose=False):
        super(Conv2dBlock, self).__init__()
        self.use_bias = True
        # initialize padding
        if pad_type == 'reflect':
            self.pad = nn.ReflectionPad2d(padding)
        elif pad_type == 'replicate':
            self.pad = nn.ReplicationPad2d(padding)
        elif pad_type == 'zero':
            self.pad = nn.ZeroPad2d(padding)
        elif pad_type == 'none':
            self.pad = None
        else:
            assert 0, "Unsupported padding type: {}".format(pad_type)

        # initialize normalization
        norm_dim = output_dim
        if norm == 'bn':
            self.norm = nn.BatchNorm2d(norm_dim)
        elif norm == 'in':
            self.norm = nn.InstanceNorm2d(norm_dim)
        elif norm == 'none':
            self.norm = None
        else:
            assert 0, "Unsupported normalization: {}".format(norm)

        # initialize activation
        if activation == 'relu':
            self.activation = nn.ReLU(inplace=True)
        elif activation == 'lrelu':
            self.activation = nn.LeakyReLU(0.2, inplace=True)
        elif activation == 'prelu':
            self.activation = nn.PReLU()
        elif activation == 'selu':
            self.activation = nn.SELU(inplace=True)
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'none':
            self.activation = None
        else:
            assert 0, "Unsupported activation: {}".format(activation)

        # initialize convolution
        if transpose:
            self.conv = nn.ConvTranspose2d(input_dim,
                                           output_dim,
                                           kernel_size,
                                           stride,
                                           padding=conv_padding,
                                           output_padding=conv_padding,
                                           bias=self.use_bias)
        else:
            self.conv = nn.Conv2d(input_dim,
                                  output_dim,
                                  kernel_size,
                                  stride,
                                  padding=conv_padding,
                                  bias=self.use_bias)
    def __init__(self,
                 device,
                 w2v_weights,
                 tag_to_itx,
                 hidden_dim,
                 drop_rate,
                 bidirectional=False,
                 freeze=True,
                 embedding_norm=6,
                 c2v_weights=None,
                 pad_word_length=16,
                 embedder="none",
                 more_features=False):

        super(LstmCrf, self).__init__()

        self.device = device
        self.hidden_dim = hidden_dim
        self.tagset_size = len(tag_to_itx)
        self.embedding_dim = w2v_weights.shape[1]
        self.w2v_weights = w2v_weights
        self.c2v_weights = c2v_weights
        self.pad_word_length = pad_word_length
        self.bidirectional = bidirectional
        self.embedder = embedder
        self.more_features = more_features

        self.drop_rate = drop_rate
        self.drop = nn.Dropout(self.drop_rate)

        # Use the Elmo embedder instead of the classical ones.
        if self.embedder != "none":
            if self.embedder == "elmo-combined":
                self.embedding = ElmoCombiner()
            elif self.embedder == "elmo":
                self.embedding = ElmoCombiner(freeze=True)
            else:
                self.embedding = None
            self.embedding_dim = 768 if self.embedder == "bert" else 1024
        else:
            self.embedding = nn.Embedding.from_pretrained(
                torch.FloatTensor(w2v_weights), freeze=freeze)
            self.embedding.max_norm = embedding_norm

        # We add the dimensionality of the other features (POS and spaCy).
        if self.more_features:
            self.embedding_dim += 58 + 18

        # recurrent and mapping to tagset
        self.recurrent = nn.LSTM(input_size=self.embedding_dim,
                                 hidden_size=self.hidden_dim //
                                 (1 if not self.bidirectional else 2),
                                 bidirectional=self.bidirectional,
                                 batch_first=True)
        self.bnorm = nn.BatchNorm2d(1)
        self.fc = nn.Linear(self.hidden_dim, self.tagset_size +
                            2)  # + 2 because of start and end token
        self.bnorm2 = nn.BatchNorm2d(1)

        # crf for scoring at a global level
        self.crf = CRF(self.device, self.tagset_size)

        # setup convolution on characters if c2v_weights are passed
        if self.c2v_weights is not None:
            self.char_embedding_dim = c2v_weights.shape[1]
            self.char_embedding = nn.Embedding.from_pretrained(
                torch.FloatTensor(c2v_weights), freeze=True)
            self.char_embedding.max_norm = embedding_norm
            self.feats = 20  # for the output channels of the conv layers

            self.recurrent = nn.LSTM(self.embedding_dim + 50,
                                     self.hidden_dim //
                                     (1 if not self.bidirectional else 2),
                                     batch_first=True,
                                     bidirectional=self.bidirectional)

            # conv layers for single character, pairs of characters, 3x characters
            self.ngram1 = nn.Sequential(
                nn.Conv2d(1,
                          self.feats * 1,
                          kernel_size=(1, self.char_embedding_dim),
                          stride=(1, self.char_embedding_dim),
                          padding=0),
                nn.Dropout2d(p=self.drop_rate),
                nn.MaxPool2d(kernel_size=(self.pad_word_length, 1)),
                nn.Tanh(),
            )

            self.ngram2 = nn.Sequential(
                nn.Conv2d(1,
                          self.feats * 2,
                          kernel_size=(2, self.char_embedding_dim),
                          stride=(1, self.char_embedding_dim),
                          padding=0),
                nn.Dropout2d(p=self.drop_rate),
                nn.MaxPool2d(kernel_size=(self.pad_word_length - 1, 1)),
                nn.Tanh(),
            )

            self.ngram3 = nn.Sequential(
                nn.Conv2d(1,
                          self.feats * 3,
                          kernel_size=(3, self.char_embedding_dim),
                          stride=(1, self.char_embedding_dim),
                          padding=0),
                nn.Dropout2d(p=self.drop_rate),
                nn.MaxPool2d(kernel_size=(self.pad_word_length - 2, 1)),
                nn.Tanh(),
            )

            # seq layers to elaborate on the output of conv layers
            self.fc1 = nn.Sequential(nn.Linear(self.feats, 10), )
            self.fc2 = nn.Sequential(nn.Linear(self.feats * 2, 20), )
            self.fc3 = nn.Sequential(nn.Linear(self.feats * 3, 20), )
Exemple #27
0
    def __init__(
        self,
        input_nc,
        output_nc,
        ngf=64,
        k_size=3,
        n_downsampling=8,
        norm_layer=nn.BatchNorm2d,
        padding_type="reflect",
        opt=None,
    ):
        super(GlobalGenerator_DCDCv2, self).__init__()
        activation = nn.ReLU(True)

        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc, min(ngf, opt.mc), kernel_size=7, padding=0),
            norm_layer(ngf),
            activation,
        ]
        ### downsample
        for i in range(opt.start_r):
            mult = 2 ** i
            model += [
                nn.Conv2d(
                    min(ngf * mult, opt.mc),
                    min(ngf * mult * 2, opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                ),
                norm_layer(min(ngf * mult * 2, opt.mc)),
                activation,
            ]
        for i in range(opt.start_r, n_downsampling - 1):
            mult = 2 ** i
            model += [
                nn.Conv2d(
                    min(ngf * mult, opt.mc),
                    min(ngf * mult * 2, opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                ),
                norm_layer(min(ngf * mult * 2, opt.mc)),
                activation,
            ]
            model += [
                ResnetBlock(
                    min(ngf * mult * 2, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]
            model += [
                ResnetBlock(
                    min(ngf * mult * 2, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]
        mult = 2 ** (n_downsampling - 1)

        if opt.spatio_size == 32:
            model += [
                nn.Conv2d(
                    min(ngf * mult, opt.mc),
                    min(ngf * mult * 2, opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                ),
                norm_layer(min(ngf * mult * 2, opt.mc)),
                activation,
            ]
        if opt.spatio_size == 64:
            model += [
                ResnetBlock(
                    min(ngf * mult * 2, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]
        model += [
            ResnetBlock(
                min(ngf * mult * 2, opt.mc),
                padding_type=padding_type,
                activation=activation,
                norm_layer=norm_layer,
                opt=opt,
            )
        ]
        # model += [nn.Conv2d(min(ngf * mult * 2, opt.mc), min(ngf, opt.mc), 1, 1)]
        if opt.feat_dim > 0:
            model += [nn.Conv2d(min(ngf * mult * 2, opt.mc), opt.feat_dim, 1, 1)]
        self.encoder = nn.Sequential(*model)

        # decode
        model = []
        if opt.feat_dim > 0:
            model += [nn.Conv2d(opt.feat_dim, min(ngf * mult * 2, opt.mc), 1, 1)]
        # model += [nn.Conv2d(min(ngf, opt.mc), min(ngf * mult * 2, opt.mc), 1, 1)]
        o_pad = 0 if k_size == 4 else 1
        mult = 2 ** n_downsampling
        model += [
            ResnetBlock(
                min(ngf * mult, opt.mc),
                padding_type=padding_type,
                activation=activation,
                norm_layer=norm_layer,
                opt=opt,
            )
        ]

        if opt.spatio_size == 32:
            model += [
                nn.ConvTranspose2d(
                    min(ngf * mult, opt.mc),
                    min(int(ngf * mult / 2), opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                    output_padding=o_pad,
                ),
                norm_layer(min(int(ngf * mult / 2), opt.mc)),
                activation,
            ]
        if opt.spatio_size == 64:
            model += [
                ResnetBlock(
                    min(ngf * mult, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]

        for i in range(1, n_downsampling - opt.start_r):
            mult = 2 ** (n_downsampling - i)
            model += [
                ResnetBlock(
                    min(ngf * mult, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]
            model += [
                ResnetBlock(
                    min(ngf * mult, opt.mc),
                    padding_type=padding_type,
                    activation=activation,
                    norm_layer=norm_layer,
                    opt=opt,
                )
            ]
            model += [
                nn.ConvTranspose2d(
                    min(ngf * mult, opt.mc),
                    min(int(ngf * mult / 2), opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                    output_padding=o_pad,
                ),
                norm_layer(min(int(ngf * mult / 2), opt.mc)),
                activation,
            ]
        for i in range(n_downsampling - opt.start_r, n_downsampling):
            mult = 2 ** (n_downsampling - i)
            model += [
                nn.ConvTranspose2d(
                    min(ngf * mult, opt.mc),
                    min(int(ngf * mult / 2), opt.mc),
                    kernel_size=k_size,
                    stride=2,
                    padding=1,
                    output_padding=o_pad,
                ),
                norm_layer(min(int(ngf * mult / 2), opt.mc)),
                activation,
            ]
        if opt.use_segmentation_model:
            model += [nn.ReflectionPad2d(3), nn.Conv2d(min(ngf, opt.mc), output_nc, kernel_size=7, padding=0)]
        else:
            model += [
                nn.ReflectionPad2d(3),
                nn.Conv2d(min(ngf, opt.mc), output_nc, kernel_size=7, padding=0),
                nn.Tanh(),
            ]
        self.decoder = nn.Sequential(*model)
Exemple #28
0
    def __init__(self, channels=3):
        super(Generator, self).__init__()

        self.down1 = UNetDown(channels, 64, normalize=False)
        self.down2 = UNetDown(64, 128)
        self.down3 = UNetDown(128, 256)
        self.down4 = UNetDown(256, 512, dropout=0.5)
        self.down5 = UNetDown(512, 512, dropout=0.5)
        self.down6 = UNetDown(512, 512, dropout=0.5)
        self.down7 = UNetDown(512, 512, dropout=0.5, normalize=False)

        self.up1 = UNetUp(512, 512, dropout=0.5)
        self.up2 = UNetUp(1024, 512, dropout=0.5)
        self.up3 = UNetUp(1024, 512, dropout=0.5)
        self.up4 = UNetUp(1024, 256)
        self.up5 = UNetUp(512, 128)
        self.up6 = UNetUp(256, 64)

        self.final = nn.Sequential(nn.ConvTranspose2d(128, channels, 4, stride=2, padding=1), nn.Tanh())
Exemple #29
0
    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'):
        assert(n_blocks >= 0)
        super(ResnetGeneratorMM, self).__init__()
        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf
        self.gpu_ids = gpu_ids
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
        model1 = [nn.ReflectionPad2d(3),

                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0,
                           bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]

        model2 = [nn.ReflectionPad2d(3),
                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0,
                           bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]


        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2**i
            model1 += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
                                stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]

	    model2 += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3,
                                stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]
	

        pre_f_blocks = 4
	pre_l_blocks = 7
        mult = 2**n_downsampling

        for i in range(pre_f_blocks):
            model1 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	    model2 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	model_pre = []
	model_post = []
	for i in range(pre_f_blocks,pre_l_blocks):
            model_pre += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
	
	for i in range(pre_l_blocks,n_blocks):
            model_post += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]


	model_fusion = []
 	#p = 0
        #if padding_type == 'reflect':
        #    model_fusion += [nn.ReflectionPad2d(1)]
        #elif padding_type == 'replicate':
        #    model_fusion += [nn.ReplicationPad2d(1)]
        #elif padding_type == 'zero':
        #    p = 1
        #else:
        #    raise NotImplementedError('padding [%s] is not implemented' % padding_type)

        model_fusion = [nn.Conv2d(ngf * mult *2, ngf * mult , kernel_size=3, padding=1, bias=use_bias),
                       norm_layer(ngf * mult),
                       nn.ReLU(True)]

	model = []
        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            model_post += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]
        model_post += [nn.ReflectionPad2d(3)]
        model_post += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model_post += [nn.Tanh()]
	self.model1 = nn.Sequential(*model1)
	self.model2 = nn.Sequential(*model2)
        self.model_pre = nn.Sequential(*model_pre)
        self.model_post = nn.Sequential(*model_post)
	self.model_fusion = nn.Sequential(*model_fusion)
    def generate(self, trg_input, dec_hidden, enc_context, src_map=None, oov_list=None, max_len=1, return_attention=False):
        '''
        Given the initial input, state and the source contexts, return the top K restuls for each time step
        :param trg_input: just word indexes of target texts (usually zeros indicating BOS <s>)
        :param dec_hidden: hidden states for decoder RNN to start with
        :param enc_context: context encoding vectors
        :param src_map: required if it's copy model
        :param oov_list: required if it's copy model
        :param k (deprecated): Top K to return
        :param feed_all_timesteps: it's one-step predicting or feed all inputs to run through all the time steps
        :param get_attention: return attention vectors?
        :return:
        '''
        # assert isinstance(input_list, list) or isinstance(input_list, tuple)
        # assert isinstance(input_list[0], list) or isinstance(input_list[0], tuple)
        batch_size      = trg_input.size(0)
        src_len         = enc_context.size(1)
        trg_len         = trg_input.size(1)
        context_dim     = enc_context.size(2)
        trg_hidden_dim  = self.trg_hidden_dim

        attn_weights = []
        copy_weights = []
        log_probs = []

        # enc_context has to be reshaped before dot attention (batch_size, src_len, context_dim) -> (batch_size, src_len, trg_hidden_dim)
        enc_context = nn.Tanh()(self.encoder2decoder_hidden(enc_context.contiguous().view(-1, context_dim))).view(batch_size, src_len, trg_hidden_dim)

        for i in range(max_len):
            # print('TRG_INPUT: %s' % str(trg_input.size()))
            # print(trg_input.data.numpy())
            trg_emb = self.embedding(trg_input)  # (batch_size, trg_len = 1, emb_dim)
            trg_emb = trg_emb.permute(1, 0, 2)  # (trg_len, batch_size, embed_dim)

            # (seq_len, batch_size, hidden_size * num_directions)
            decoder_output, dec_hidden = self.decoder(
                trg_emb, dec_hidden
            )

            # Get the h_tilde (hidden after attention) and attention weights
            h_tilde, attn_weight, attn_logit = self.attention_layer(decoder_output.permute(1, 0, 2), enc_context)

            # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde)
            # (batch_size, trg_len, trg_hidden_size) -> (batch_size, 1, vocab_size)
            decoder_logit = self.decoder2vocab(h_tilde.view(-1, trg_hidden_dim))

            if not hasattr(self, 'copy_model'):
                decoder_log_prob  = torch.nn.functional.log_softmax(decoder_logit, dim=-1).view(batch_size, 1, self.vocab_size)
            else:
                decoder_logit = decoder_logit.view(batch_size, 1, self.vocab_size)
                # copy_weights and copy_logits is (batch_size, trg_len, src_len)
                if self.copy_attention_layer:
                    _, copy_weight, copy_logit = self.copy_attention_layer(decoder_output.permute(1, 0, 2), enc_context)
                else:
                    copy_weight = attn_weight
                    copy_logit  = attn_logit
                copy_weights.append(copy_weight.permute(1, 0, 2)) # (1, batch_size, src_len)
                # merge the generative and copying probs (batch_size, 1, vocab_size + max_unk_word)
                decoder_log_prob   = self.merge_copy_probs(decoder_logit, copy_logit, src_map, oov_list)

            # Prepare for the next iteration, get the top word, top_idx and next_index are (batch_size, K)
            top_1_v, top_1_idx  = decoder_log_prob.data.topk(1, dim=-1) # (batch_size, 1)
            trg_input           = Variable(top_1_idx.squeeze(2))
            # trg_input           = Variable(top_1_idx).cuda() if torch.cuda.is_available() else Variable(top_1_idx) # (batch_size, 1)

            # append to return lists
            log_probs.append(decoder_log_prob.permute(1, 0, 2)) # (1, batch_size, vocab_size)
            attn_weights.append(attn_weight.permute(1, 0, 2)) # (1, batch_size, src_len)

        # permute to trg_len first, otherwise the cat operation would mess up things
        log_probs       = torch.cat(log_probs, 0).permute(1, 0, 2) # (batch_size, max_len, K)
        attn_weights    = torch.cat(attn_weights, 0).permute(1, 0, 2) # (batch_size, max_len, src_seq_len)

        # Only return the hidden vectors of the last time step.
        #   tuple of (num_layers * num_directions, batch_size, trg_hidden_dim)=(1, batch_size, trg_hidden_dim)

        # Return final outputs, hidden states, and attention weights (for visualization)
        if return_attention:
            if not hasattr(self, 'copy_model'):
                return log_probs, dec_hidden, attn_weights
            else:
                copy_weights = torch.cat(copy_weights, 0).permute(1, 0, 2) # (batch_size, max_len, src_seq_len)
                return log_probs, dec_hidden, (attn_weights, copy_weights)
        else:
            return log_probs, dec_hidden