Esempio n. 1
0
    def forward(self, sentence):
        # print(sentence)                                     # [torch.LongTensor of size 42x64]
        x = self.word_embeddings(sentence)
        x = self.dropout_embed(x)
        # print(embeds.size())                                # torch.Size([42, 64, 100])
        # x = embeds.view(len(sentence), self.batch_size, -1)
        # print(x.size())                                     # torch.Size([42, 64, 100])
        lstm_out, self.hidden = self.lstm(x, self.hidden)   # lstm_out 10*5*50 hidden 1*5*50 *2
        # print(lstm_out)
        # lstm_out = [F.max_pool1d(i, len(lstm_out)).unsqueeze(2) for i in lstm_out]
        lstm_out = torch.transpose(lstm_out, 0, 1)
        lstm_out = torch.transpose(lstm_out, 1, 2)

        lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2))
        # print(lstm_out.size())
        lstm_out = lstm_out.squeeze(2)
        # y = self.hidden2label(lstm_out)

        #lstm_out = torch.cat(lstm_out, 1)
        # lstm_out = self.dropout(lstm_out)
        # lstm_out = lstm_out.view(len(sentence), -1)
        y = self.hidden2label1(F.tanh(lstm_out))
        y = self.hidden2label2(F.tanh(y))
        # log_probs = F.log_softmax(y)
        log_probs = y
        return log_probs
Esempio n. 2
0
def PeepholeLSTMCell(input: torch.Tensor,
                     hidden: Tuple[torch.Tensor, torch.Tensor],
                     w_ih: torch.Tensor,
                     w_hh: torch.Tensor,
                     w_ip: torch.Tensor,
                     w_fp: torch.Tensor,
                     w_op: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    An LSTM cell with peephole connections without biases.

    Mostly ripped from the pytorch autograd lstm implementation.
    """
    hx, cx = hidden
    gates = F.linear(input, w_ih) + F.linear(hx, w_hh)

    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
    peep_i = w_ip.unsqueeze(0).expand_as(cx) * cx
    ingate = ingate + peep_i
    peep_f = w_fp.unsqueeze(0).expand_as(cx) * cx
    forgetgate = forgetgate + peep_f

    ingate = F.sigmoid(ingate)
    forgetgate = F.sigmoid(forgetgate)
    cellgate = F.tanh(cellgate)
    cy = (forgetgate * cx) + (ingate * cellgate)
    peep_o = w_op.unsqueeze(0).expand_as(cy) * cy
    outgate = outgate + peep_o
    hy = outgate * F.tanh(cy)

    return hy, cy
    def forward(self, xt, fc_feats, att_feats, p_att_feats, state):
        # The p_att_feats here is already projected
        att_size = att_feats.numel() // att_feats.size(0) // self.att_feat_size
        att = p_att_feats.view(-1, att_size, self.att_hid_size)
        
        att_h = self.h2att(state[0][-1])                        # batch * att_hid_size
        att_h = att_h.unsqueeze(1).expand_as(att)            # batch * att_size * att_hid_size
        dot = att + att_h                                   # batch * att_size * att_hid_size
        dot = F.tanh(dot)                                # batch * att_size * att_hid_size
        dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
        dot = self.alpha_net(dot)                           # (batch * att_size) * 1
        dot = dot.view(-1, att_size)                        # batch * att_size
        
        weight = F.softmax(dot)                             # batch * att_size
        att_feats_ = att_feats.view(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size
        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size

        all_input_sums = self.i2h(xt) + self.h2h(state[0][-1])
        sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size)
        sigmoid_chunk = F.sigmoid(sigmoid_chunk)
        in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size)
        forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size)
        out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size)

        in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size) + \
            self.a2c(att_res)
        in_transform = torch.max(\
            in_transform.narrow(1, 0, self.rnn_size),
            in_transform.narrow(1, self.rnn_size, self.rnn_size))
        next_c = forget_gate * state[1][-1] + in_gate * in_transform
        next_h = out_gate * F.tanh(next_c)

        output = self.dropout(next_h)
        state = (next_h.unsqueeze(0), next_c.unsqueeze(0))
        return output, state
    def forward(self, x):
        # print("fffff",x)
        embed = self.embed(x)

        # CNN
        cnn_x = embed
        cnn_x = torch.transpose(cnn_x, 0, 1)
        cnn_x = cnn_x.unsqueeze(1)
        cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1]  # [(N,Co,W), ...]*len(Ks)
        cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x]  # [(N,Co), ...]*len(Ks)
        cnn_x = torch.cat(cnn_x, 1)
        cnn_x = self.dropout(cnn_x)

        # LSTM
        lstm_x = embed.view(len(x), embed.size(1), -1)
        lstm_out, self.hidden = self.lstm(lstm_x, self.hidden)
        lstm_out = torch.transpose(lstm_out, 0, 1)
        lstm_out = torch.transpose(lstm_out, 1, 2)
        # lstm_out = F.tanh(lstm_out)
        lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2)

        # CNN and LSTM cat
        cnn_x = torch.transpose(cnn_x, 0, 1)
        lstm_out = torch.transpose(lstm_out, 0, 1)
        cnn_lstm_out = torch.cat((cnn_x, lstm_out), 0)
        cnn_lstm_out = torch.transpose(cnn_lstm_out, 0, 1)

        # linear
        cnn_lstm_out = self.hidden2label1(F.tanh(cnn_lstm_out))
        cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out))

        # output
        logit = cnn_lstm_out
        return logit
    def norm_flow(self, params, z, v, logposterior):

        h = F.tanh(params[0][0](z))
        mew_ = params[0][1](h)
        sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z]


        z_reshaped = z.view(self.P, self.B, self.z_size)

        gradients = torch.autograd.grad(outputs=logposterior(z_reshaped), inputs=z_reshaped,
                          grad_outputs=self.grad_outputs,
                          create_graph=True, retain_graph=True, only_inputs=True)[0]
        gradients = gradients.detach()

        gradients = gradients.view(-1,self.z_size)


        v = v*sig_ + mew_*gradients

        logdet = torch.sum(torch.log(sig_), 1)


        h = F.tanh(params[1][0](v))
        mew_ = params[1][1](h)
        sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z]

        z = z*sig_ + mew_*v

        logdet2 = torch.sum(torch.log(sig_), 1)

        #[PB]
        logdet = logdet + logdet2
        
        #[PB,Z], [PB]
        return z, v, logdet
    def forward(self, h_out, fake_region, conv_feat, conv_feat_embed):

        # View into three dimensions
        att_size = conv_feat.numel() // conv_feat.size(0) // self.rnn_size
        conv_feat = conv_feat.view(-1, att_size, self.rnn_size)
        conv_feat_embed = conv_feat_embed.view(-1, att_size, self.att_hid_size)

        # view neighbor from bach_size * neighbor_num x rnn_size to bach_size x rnn_size * neighbor_num
        fake_region = self.fr_linear(fake_region)
        fake_region_embed = self.fr_embed(fake_region)

        h_out_linear = self.ho_linear(h_out)
        h_out_embed = self.ho_embed(h_out_linear)

        txt_replicate = h_out_embed.unsqueeze(1).expand(h_out_embed.size(0), att_size + 1, h_out_embed.size(1))

        img_all = torch.cat([fake_region.view(-1,1,self.input_encoding_size), conv_feat], 1)
        img_all_embed = torch.cat([fake_region_embed.view(-1,1,self.input_encoding_size), conv_feat_embed], 1)

        hA = F.tanh(img_all_embed + txt_replicate)
        hA = F.dropout(hA,self.drop_prob_lm, self.training)
        
        hAflat = self.alpha_net(hA.view(-1, self.att_hid_size))
        PI = F.softmax(hAflat.view(-1, att_size + 1))

        visAtt = torch.bmm(PI.unsqueeze(1), img_all)
        visAttdim = visAtt.squeeze(1)

        atten_out = visAttdim + h_out_linear

        h = F.tanh(self.att2h(atten_out))
        h = F.dropout(h, self.drop_prob_lm, self.training)
        return h
Esempio n. 7
0
    def forward(self, inputs):
        x, u = inputs
        x = self.bn0(x)
        x = F.tanh(self.linear1(x))
        x = F.tanh(self.linear2(x))

        V = self.V(x)
        mu = F.tanh(self.mu(x))

        Q = None
        if u is not None:
            num_outputs = mu.size(1)
            L = self.L(x).view(-1, num_outputs, num_outputs)
            L = L * \
                self.tril_mask.expand_as(
                    L) + torch.exp(L) * self.diag_mask.expand_as(L)
            P = torch.bmm(L, L.transpose(2, 1))

            u_mu = (u - mu).unsqueeze(2)
            A = -0.5 * \
                torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]

            Q = A + V

        return mu, Q, V
    def norm_flow(self, params, z, v):

        # print (z.size())
        h = F.tanh(params[0][0](z))
        mew_ = params[0][1](h)
        sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z]

        # print (v.size())
        # print (mew_.size())
        # print (self.B)
        # print (self.P)

        v = v*sig_ + mew_

        logdet = torch.sum(torch.log(sig_), 1)


        h = F.tanh(params[1][0](v))
        mew_ = params[1][1](h)
        sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z]

        z = z*sig_ + mew_

        logdet2 = torch.sum(torch.log(sig_), 1)

        #[PB]
        logdet = logdet + logdet2
        
        #[PB,Z], [PB]
        return z, v, logdet
Esempio n. 9
0
    def forward(self, inputs):
        x = inputs
        x = self.bn0(x)
        x = F.tanh(self.linear1(x))
        x = F.tanh(self.linear2(x))

        mu = F.tanh(self.mu(x))
        return mu
    def forward(self, xt, img_fc, state):

        hs = []
        cs = []
        for L in range(self.num_layers):
            # c,h from previous timesteps
            prev_h = state[0][L]
            prev_c = state[1][L]
            # the input to this layer
            if L == 0:
                x = xt
                i2h = self.w2h(x) + self.v2h(img_fc)
            else:
                x = hs[-1]
                x = F.dropout(x, self.drop_prob_lm, self.training)
                i2h = self.i2h[L-1](x)

            all_input_sums = i2h+self.h2h[L](prev_h)

            sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size)
            sigmoid_chunk = F.sigmoid(sigmoid_chunk)
            # decode the gates
            in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size)
            forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size)
            out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size)
            # decode the write inputs
            if not self.use_maxout:
                in_transform = F.tanh(all_input_sums.narrow(1, 3 * self.rnn_size, self.rnn_size))
            else:
                in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size)
                in_transform = torch.max(\
                    in_transform.narrow(1, 0, self.rnn_size),
                    in_transform.narrow(1, self.rnn_size, self.rnn_size))
            # perform the LSTM update
            next_c = forget_gate * prev_c + in_gate * in_transform
            # gated cells form the output
            tanh_nex_c = F.tanh(next_c)
            next_h = out_gate * tanh_nex_c
            if L == self.num_layers-1:
                if L == 0:
                    i2h = self.r_w2h(x) + self.r_v2h(img_fc)
                else:
                    i2h = self.r_i2h(x)
                n5 = i2h+self.r_h2h(prev_h)
                fake_region = F.sigmoid(n5) * tanh_nex_c

            cs.append(next_c)
            hs.append(next_h)

        # set up the decoder
        top_h = hs[-1]
        top_h = F.dropout(top_h, self.drop_prob_lm, self.training)
        fake_region = F.dropout(fake_region, self.drop_prob_lm, self.training)

        state = (torch.cat([_.unsqueeze(0) for _ in hs], 0), 
                torch.cat([_.unsqueeze(0) for _ in cs], 0))
        return top_h, fake_region, state
Esempio n. 11
0
    def forward(self, inputs, actions):
        x = inputs
        x = self.bn0(x)
        x = F.tanh(self.linear1(x))
        a = F.tanh(self.linear_action(actions))
        x = torch.cat((x, a), 1)
        x = F.tanh(self.linear2(x))

        V = self.V(x)
        return V
 def forward(self, input, cell):
     hx, cx = cell
     input = self.i2h_bn(self.i2h(input)) + self.h2h_bn(self.h2h(hx))
     gates = F.sigmoid(input[:, :3*self.hidden_size])
     in_gate = gates[:, :self.hidden_size]
     forget_gate = gates[:, self.hidden_size:2*self.hidden_size]
     out_gate = gates[:, 2*self.hidden_size:3*self.hidden_size]
     input = F.tanh(input[:, 3*self.hidden_size:4*self.hidden_size])
     cx = (forget_gate * cx) + (in_gate * input)
     hx = out_gate * F.tanh(self.cx_bn(cx))
     return hx, cx
Esempio n. 13
0
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
    hx, cx = hidden
    gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)

    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
    ingate = F.sigmoid(ingate)
    forgetgate = F.sigmoid(forgetgate)
    cellgate = F.tanh(cellgate)
    outgate = F.sigmoid(outgate)

    cy = (forgetgate * cx) + (ingate * cellgate)
    hy = outgate * F.tanh(cy)
    return hy, cy
Esempio n. 14
0
 def _get_lstm_features(self, names, lengths):
     self.hidden = self.init_hidden(names.size(-1))
     embeds = self.char_embeds(names)  # Figure 4
     packed_input = pack_padded_sequence(embeds, lengths)  # Figure 5
     packed_output, (ht, ct) = self.lstm(packed_input, self.hidden)  # Figure 6
     lstm_out, _ = pad_packed_sequence(packed_output)  # Figure 7
     lstm_out = torch.transpose(lstm_out, 0, 1)
     lstm_out = torch.transpose(lstm_out, 1, 2)
     lstm_out = F.tanh(lstm_out)  # Figure 8
     lstm_out, indices = F.max_pool1d(lstm_out, lstm_out.size(2), return_indices=True)  # Figure 9
     lstm_out = lstm_out.squeeze(2)  #对维度的修正,使其符合输入格式
     lstm_out = F.tanh(lstm_out)
     lstm_feats = self.fully_connected_layer(lstm_out)
     output = self.softmax(lstm_feats)  # Figure 10
     return output
 def forward(self, x):
     embed = self.embed(x)
     embed = self.dropout_embed(embed)
     x = embed.view(len(x), embed.size(1), -1)
     # lstm
     lstm_out, self.hidden = self.lstm(x, self.hidden)
     lstm_out = torch.transpose(lstm_out, 0, 1)
     lstm_out = torch.transpose(lstm_out, 1, 2)
     # pooling
     lstm_out = F.tanh(lstm_out)
     lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2)
     lstm_out = F.tanh(lstm_out)
     # linear
     logit = self.hidden2label(lstm_out)
     return logit
 def score(self, hidden, encoder_output):
     
     if self.method == 'dot':            
         # hidden is 1 by 256
         # encoder_output is 22 by 256
         encoder_output = torch.transpose(encoder_output, 0, 1)
         # encoder_output is 256 by 22
         energy = torch.matmul(hidden, encoder_output)
         return energy
     
     elif self.method == 'general':
         # hidden is 1 by 256
         # encoder_output is 256 by 22
         # encoder_output = torch.transpose(encoder_output, 0, 1)
         hidden = hidden.view(1, -1)
         a = self.attn(encoder_output)
         a = torch.transpose(a, 0, 1)
         energy = torch.matmul(hidden, a)
         return energy
     
     elif self.method == 'concat':
         len_encoder_output = encoder_output.size()[1]
         # hidden is 1 by 256
         # encoder_output is 256 by 22
         hidden = torch.transpose(hidden, 0, 1)
         # hidden is 256 by 1
         hidden = hidden.repeat(hidden_size, len_encoder_output)
         # hidden is 256 by 22
         concat = torch.cat((hidden, encoder_output), dim=0)
         # concat is 512 by 22
         # self.attn(concat) --> 256 by 22
         energy = torch.matmul(self.v, F.tanh(self.attn(concat)))
         return energy
Esempio n. 17
0
    def step(self, x, h_tm1, src_encodings, src_encodings_att_linear, src_token_mask=None, return_att_weight=False):
        """Perform a single time-step of computation in decoder LSTM

        Args:
            x: variable of shape (batch_size, hidden_size), input
            h_tm1: Tuple[Variable(batch_size, hidden_size), Variable(batch_size, hidden_size)], previous
                   hidden and cell states
            src_encodings: variable of shape (batch_size, src_sent_len, hidden_size * 2), encodings of source utterances
            src_encodings_att_linear: linearly transformed source encodings
            src_token_mask: mask over source tokens (Note: unused entries are masked to **one**)
            return_att_weight: return attention weights

        Returns:
            The new LSTM hidden state and cell state
        """

        # h_t: (batch_size, hidden_size)
        h_t, cell_t = self.decoder_lstm(x, h_tm1)

        ctx_t, alpha_t = nn_utils.dot_prod_attention(h_t,
                                                     src_encodings, src_encodings_att_linear,
                                                     mask=src_token_mask)

        att_t = F.tanh(self.att_vec_linear(torch.cat([h_t, ctx_t], 1)))  # E.q. (5)
        att_t = self.dropout(att_t)

        if return_att_weight:
            return (h_t, cell_t), att_t, alpha_t
        else: return (h_t, cell_t), att_t
Esempio n. 18
0
    def forward(self, x):
        """
        :param x: tensor with shape [batch_size, max_seq_len, max_word_len, char_embed_size]

        :return: tensor with shape [batch_size, max_seq_len, depth_sum]

        applies multikenrel 1d-conv layer along every word in input with max-over-time pooling
            to emit fixed-size output
        """

        input_size = x.size()
        input_size_len = len(input_size)

        assert input_size_len == 4, \
            'Wrong input rang, must be equal to 4, but {} found'.format(input_size_len)

        [batch_size, seq_len, _, embed_size] = input_size

        assert embed_size == self.params.char_embed_size, \
            'Wrong embedding size, must be equal to {}, but {} found'.format(self.params.char_embed_size, embed_size)

        # leaps with shape
        x = x.view(-1, self.params.max_word_len, self.params.char_embed_size).transpose(1, 2).contiguous()

        xs = [F.tanh(F.conv1d(x, kernel, bias=self.biases[i])) for i, kernel in enumerate(self.kernels)]
        xs = [x.max(2)[0].squeeze(2) for x in xs]

        x = t.cat(xs, 1)
        x = x.view(batch_size, seq_len, -1)

        return x
Esempio n. 19
0
def readout(h, h2):
  catted_reads = map(lambda x: torch.cat([h[x[0]], h2[x[1]]], 1), zip(h2.keys(), h.keys()))
  activated_reads = map(lambda x: F.selu( R(x) ), catted_reads)
  readout = Variable(torch.zeros(1, 128))
  for read in activated_reads:
    readout = readout + read
  return F.tanh( readout )
Esempio n. 20
0
 def forward(self, x):
     x = F.leaky_relu(self.fc1(x), 0.2, inplace=True)
     x = F.leaky_relu(self.fc11(x), 0.2, inplace=True)
     x = F.leaky_relu(self.fc2(x), 0.2, inplace=True)
     x = F.leaky_relu(self.fc3(x), 0.2, inplace=True)
     x = F.tanh(self.out(x))
     return x
Esempio n. 21
0
    def init_decoder_state(self, enc_last_state, enc_last_cell):
        """Compute the initial decoder hidden state and cell state"""

        h_0 = self.decoder_cell_init(enc_last_cell)
        h_0 = F.tanh(h_0)

        return h_0, Variable(self.new_tensor(h_0.size()).zero_())
Esempio n. 22
0
    def forward(self, s_t_hat, h, enc_padding_mask, coverage):
        b, t_k, n = list(h.size())
        h = h.view(-1, n)  # B * t_k x 2*hidden_dim
        encoder_feature = self.W_h(h)

        dec_fea = self.decode_proj(s_t_hat) # B x 2*hidden_dim
        dec_fea_expanded = dec_fea.unsqueeze(1).expand(b, t_k, n).contiguous() # B x t_k x 2*hidden_dim
        dec_fea_expanded = dec_fea_expanded.view(-1, n)  # B * t_k x 2*hidden_dim

        att_features = encoder_feature + dec_fea_expanded # B * t_k x 2*hidden_dim
        if config.is_coverage:
            coverage_input = coverage.view(-1, 1)  # B * t_k x 1
            coverage_feature = self.W_c(coverage_input)  # B * t_k x 2*hidden_dim
            att_features = att_features + coverage_feature

        e = F.tanh(att_features) # B * t_k x 2*hidden_dim
        scores = self.v(e)  # B * t_k x 1
        scores = scores.view(-1, t_k)  # B x t_k

        attn_dist_ = F.softmax(scores, dim=1)*enc_padding_mask # B x t_k
        normalization_factor = attn_dist_.sum(1, keepdim=True)
        attn_dist = attn_dist_ / normalization_factor

        attn_dist = attn_dist.unsqueeze(1)  # B x 1 x t_k
        h = h.view(-1, t_k, n)  # B x t_k x 2*hidden_dim
        c_t = torch.bmm(attn_dist, h)  # B x 1 x n
        c_t = c_t.view(-1, config.hidden_dim * 2)  # B x 2*hidden_dim

        attn_dist = attn_dist.view(-1, t_k)  # B x t_k

        if config.is_coverage:
            coverage = coverage.view(-1, t_k)
            coverage = coverage + attn_dist

        return c_t, attn_dist, coverage
Esempio n. 23
0
 def forward(self, x):
     x = F.relu(self.fc1(x))
     x = F.relu(self.fc11(x))
     x = F.relu(self.fc2(x))
     x = F.relu(self.fc3(x))
     x = F.tanh(self.out(x))
     return x
 def forward(self, x):
     x = self.fc1(x).view(-1, self.channels, self.rows, self.rows)
     x = F.relu(self.batch_norm1(x))
     x = F.relu(self.batch_norm2(self.conv1(x)))
     x = F.relu(self.batch_norm3(self.conv2(x)))
     x = F.relu(self.batch_norm4(self.conv3(x)))
     return F.tanh(self.conv4(x))
Esempio n. 25
0
    def forward(self, input_seq, last_hidden, encoder_outputs):
        # Note: we run this one step at a time

        # Get the embedding of the current input word (last output word)
        embedded = self.embedding(input_seq)
        embedded = self.embedding_dropout(embedded) #[1, 64, 512]
        if(embedded.size(0) != 1):
            raise ValueError('Decoder input sequence length should be 1')

        # Get current hidden state from input word and last hidden state
        rnn_output, hidden = self.gru(embedded, last_hidden)

        # Calculate attention from current RNN state and all encoder outputs;
        # apply to encoder outputs to get weighted average
        attn_weights = self.attn(rnn_output, encoder_outputs) #[64, 1, 14]
        # encoder_outputs [14, 64, 512]
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) #[64, 1, 512]

        # Attentional vector using the RNN hidden state and context vector
        # concatenated together (Luong eq. 5)
        rnn_output = rnn_output.squeeze(0) #[64, 512]
        context = context.squeeze(1) #[64, 512]
        concat_input = torch.cat((rnn_output, context), 1) #[64, 1024]
        concat_output = F.tanh(self.concat(concat_input)) #[64, 512]

        # Finally predict next token (Luong eq. 6, without softmax)
        output = self.out(concat_output) #[64, output_size]
        output = F.softmax(output)

        # Return final output, hidden state, and attention weights (for visualization)
        return output, hidden, attn_weights
Esempio n. 26
0
 def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
     """
         input:
             word_inputs: (batch_size, sent_len)
             word_seq_lengths: list of batch_size, (batch_size,1)
             char_inputs: (batch_size*sent_len, word_length)
             char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
             char_seq_recover: variable which records the char order information, used to recover char order
         output:
             Variable(batch_size, sent_len, hidden_dim)
     """
     word_represent = self.wordrep(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
     ## word_embs (batch_size, seq_len, embed_size)
     if self.word_feature_extractor == "CNN":
         word_in = F.tanh(self.word2cnn(word_represent)).transpose(2,1).contiguous()
         for idx in range(self.cnn_layer):
             if idx == 0:
                 cnn_feature = F.relu(self.cnn_list[idx](word_in))
             else:
                 cnn_feature = F.relu(self.cnn_list[idx](cnn_feature))
             cnn_feature = self.cnn_drop_list[idx](cnn_feature)
             cnn_feature = self.cnn_batchnorm_list[idx](cnn_feature)
         feature_out = cnn_feature.transpose(2,1).contiguous()
     else:
         packed_words = pack_padded_sequence(word_represent, word_seq_lengths.cpu().numpy(), True)
         hidden = None
         lstm_out, hidden = self.lstm(packed_words, hidden)
         lstm_out, _ = pad_packed_sequence(lstm_out)
         ## lstm_out (seq_len, seq_len, hidden_size)
         feature_out = self.droplstm(lstm_out.transpose(1,0))
     ## feature_out (batch_size, seq_len, hidden_size)
     outputs = self.hidden2tag(feature_out)
     return outputs
    def forward(self, xt, fc_feats, att_feats, state):
        att_size = att_feats.numel() // att_feats.size(0) // self.att_feat_size
        att = att_feats.view(-1, self.att_feat_size)
        if self.att_hid_size > 0:
            att = self.ctx2att(att)                             # (batch * att_size) * att_hid_size
            att = att.view(-1, att_size, self.att_hid_size)     # batch * att_size * att_hid_size
            att_h = self.h2att(state[0][-1])                    # batch * att_hid_size
            att_h = att_h.unsqueeze(1).expand_as(att)           # batch * att_size * att_hid_size
            dot = att + att_h                                   # batch * att_size * att_hid_size
            dot = F.tanh(dot)                                   # batch * att_size * att_hid_size
            dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
            dot = self.alpha_net(dot)                           # (batch * att_size) * 1
            dot = dot.view(-1, att_size)                        # batch * att_size
        else:
            att = self.ctx2att(att)(att)                        # (batch * att_size) * 1
            att = att.view(-1, att_size)                        # batch * att_size
            att_h = self.h2att(state[0][-1])                    # batch * 1
            att_h = att_h.expand_as(att)                        # batch * att_size
            dot = att_h + att                                   # batch * att_size
        
        weight = F.softmax(dot)
        att_feats_ = att_feats.view(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size
        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size

        output, state = self.rnn(torch.cat([xt, att_res], 1).unsqueeze(0), state)
        return output.squeeze(0), state
Esempio n. 28
0
    def baseline(self, samples, enc_states):
        # compute baseline, which is an MLP
        # (sample_size) FIXME: reward is log-likelihood, shall we use activation here?

        b_x = self.b_x_l2(F.tanh(self.b_x_l1(enc_states.detach()))).view(-1)

        return b_x + self.b
    def forward(self, output, context):
        batch_size = output.size(0)
        hidden_size = output.size(2)
        input_size = context.size(1)

        # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
        attn = torch.bmm(output, context.transpose(1, 2))
        mask = torch.eq(attn, 0).data.byte()
        attn.data.masked_fill_(mask, -float('inf'))
        attn = F.softmax(attn.view(-1, input_size), dim=1).view(batch_size, -1, input_size)

        # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
        mix = torch.bmm(attn, context)

        # concat -> (batch, out_len, 2*dim)
        combined = torch.cat((mix, output), dim=2)

        # output -> (batch, out_len, dim)
        output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)


        if not output.is_contiguous():
            output = output.contiguous()

        return output, attn
    def forward(self, x):
        x = self.embed(x)
        x = self.dropout(x)
        # x = x.view(len(x), x.size(1), -1)
        # x = embed.view(len(x), embed.size(1), -1)
        bilstm_out, self.hidden = self.bilstm(x, self.hidden)

        bilstm_out = torch.transpose(bilstm_out, 0, 1)
        bilstm_out = torch.transpose(bilstm_out, 1, 2)
        # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2)
        bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2))
        bilstm_out = bilstm_out.squeeze(2)

        hidden2lable = self.hidden2label1(F.tanh(bilstm_out))

        gate_layer = F.sigmoid(self.gate_layer(bilstm_out))
        # calculate highway layer values
        gate_hidden_layer = torch.mul(hidden2lable, gate_layer)
        # if write like follow ,can run,but not equal the HighWay NetWorks formula
        # gate_input = torch.mul((1 - gate_layer), hidden2lable)
        gate_input = torch.mul((1 - gate_layer), bilstm_out)
        highway_output = torch.add(gate_hidden_layer, gate_input)

        logit = self.logit_layer(highway_output)

        return logit
Esempio n. 31
0
 def _select_function(h, function_id):
     h = torch.stack([F.tanh(h), F.relu(h), F.sigmoid(h), h], dim=0)
     h = h[function_id]
     return h
Esempio n. 32
0
    def _nas_cell(self, sample_arc, x, prev_s, input_mask, layer_mask):
        """Multi-layer LSTM.

        Args:
            sample_arc: [num_layers * 2], sequence of tokens representing architecture.
            x: [batch_size, num_steps, hidden_size].
            prev_s: [batch_size, hidden_size].
            w_prev: [2 * hidden_size, 2 * hidden_size].
            w_skip: [None, [hidden_size, 2 * hidden_size] * (num_layers-1)].
            input_mask: `[batch_size, hidden_size]`.
            layer_mask: `[batch_size, hidden_size]`.
            params: hyper-params object.

        Returns:
            next_s: [batch_size, hidden_size].
            all_s: [[batch_size, num_steps, hidden_size] * num_layers].
        """
        num_layers = len(sample_arc) // 2


        # extract the relevant variables, so that you only do L2-reg on them.
        # u_skip = []
        # start_idx = 0
        # for layer_id in range(1, num_layers):
        #     prev_idx = sample_arc[start_idx]
        #     func_idx = sample_arc[start_idx + 1]
        #     u_skip.append(self.w_combined[prev_idx][layer_id][func_idx])
        #     start_idx += 2
        # w_skip = u_skip
        # var_s = [self.w_prev] + w_skip[1:]

        def _select_function(h, function_id):
            h = torch.stack([F.tanh(h), F.relu(h), F.sigmoid(h), h], dim=0)
            h = h[function_id]
            return h

        """Body function."""
        # important change: first input uses a tanh()
        if layer_mask is not None:
            assert input_mask is not None
            # self.w_prev.weight.data = self.w_prev.weight.data * self.w_prev_mask
            ht = self.w_prev(torch.cat([x * input_mask, prev_s * layer_mask],
                                        dim=1))
        else:
            ht = self.w_prev(torch.cat([x, prev_s], dim=1))
        h, t = torch.split(ht, self.args.shared_hid, dim=1)
        h = F.tanh(h)
        t = F.sigmoid(t)
        s = prev_s + t * (h - prev_s)
        layers = [s]

        start_idx = 0
        used = []
        for layer_id in range(1, num_layers):
            prev_idx = sample_arc[start_idx].item()
            func_idx = sample_arc[start_idx + 1].item()
            # used.append(tf.one_hot(prev_idx, depth=num_layers, dtype=tf.int32)) not used?
            prev_s = torch.stack(layers, dim=0)[prev_idx]
            if layer_mask is not None:
                # self.w_combined[prev_idx][layer_id][func_idx].weight.data =\
                #     self.w_combined[prev_idx][layer_id][func_idx].weight.data * self.weight_mask
                ht = self.w_combined[prev_idx][layer_id][func_idx](prev_s * layer_mask)

            else:
                ht = self.w_combined[prev_idx][layer_id][func_idx](prev_s)
            h, t = torch.split(ht, self.args.shared_hid, dim=1)

            h = _select_function(h, func_idx)
            t = F.sigmoid(t)
            s = prev_s + t * (h - prev_s)
            # s.set_shape([batch_size, self.hidden_size])
            # s = s.view(batch_size, self.hidden_size)
            layers.append(s)
            start_idx += 2

        t_layers = torch.stack(layers[1:]),
        next_s = torch.sum(t_layers[0],  dim=0) / num_layers

        return next_s
Esempio n. 33
0
 def forward(self, state):
     """Build an actor (policy) network that maps states -> actions."""
     x = F.relu(self.fc1(self.bn1(state)))
     x = F.relu(self.fc2(self.bn2(x)))
     return F.tanh(self.fc3(self.bn3(x)))
def test_tom(opt, test_loader, model, board):
    model.cuda()
    model.eval()

    base_name = os.path.basename(opt.checkpoint)
    # save_dir = os.path.join(opt.result_dir, base_name, opt.datamode)
    save_dir = os.path.join(opt.result_dir, opt.name, opt.datamode)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    try_on_dir = os.path.join(save_dir, 'try-on')
    if not os.path.exists(try_on_dir):
        os.makedirs(try_on_dir)
    p_rendered_dir = os.path.join(save_dir, 'p_rendered')
    if not os.path.exists(p_rendered_dir):
        os.makedirs(p_rendered_dir)
    m_composite_dir = os.path.join(save_dir, 'm_composite')
    if not os.path.exists(m_composite_dir):
        os.makedirs(m_composite_dir)
    im_pose_dir = os.path.join(save_dir, 'im_pose')
    if not os.path.exists(im_pose_dir):
        os.makedirs(im_pose_dir)
    shape_dir = os.path.join(save_dir, 'shape')
    if not os.path.exists(shape_dir):
        os.makedirs(shape_dir)
    im_h_dir = os.path.join(save_dir, 'im_h')
    if not os.path.exists(im_h_dir):
        os.makedirs(im_h_dir)  # for test data

    print('Dataset size: %05d!' % (len(test_loader.dataset)), flush=True)
    for step, inputs in enumerate(test_loader.data_loader):
        iter_start_time = time.time()

        im_names = inputs['im_name']
        im = inputs['image'].cuda()
        im_pose = inputs['pose_image']
        im_h = inputs['head']
        shape = inputs['shape']

        agnostic = inputs['agnostic'].cuda()
        c = inputs['cloth'].cuda()
        cm = inputs['cloth_mask'].cuda()

        # outputs = model(torch.cat([agnostic, c], 1))  # CP-VTON
        outputs = model(torch.cat([agnostic, c, cm], 1))  # CP-VTON+
        p_rendered, m_composite = torch.split(outputs, 3, 1)
        p_rendered = F.tanh(p_rendered)
        m_composite = F.sigmoid(m_composite)
        p_tryon = c * m_composite + p_rendered * (1 - m_composite)

        visuals = [[im_h, shape, im_pose], [c, 2 * cm - 1, m_composite],
                   [p_rendered, p_tryon, im]]

        save_images(p_tryon, im_names, try_on_dir)
        save_images(im_h, im_names, im_h_dir)
        save_images(shape, im_names, shape_dir)
        save_images(im_pose, im_names, im_pose_dir)
        save_images(m_composite, im_names, m_composite_dir)
        save_images(p_rendered, im_names, p_rendered_dir)  # For test data

        if (step + 1) % opt.display_count == 0:
            board_add_images(board, 'combine', visuals, step + 1)
            t = time.time() - iter_start_time
            print('step: %8d, time: %.3f' % (step + 1, t), flush=True)
 def forward(self, original_value, to_update_value):
     x = torch.cat((original_value, to_update_value), dim=-1)
     update_value = F.tanh(self.update_value(x))
     update_gate = F.sigmoid(self.update_gate(x))
     return original_value * (1 - update_gate) + update_value * update_gate
Esempio n. 36
0
def train_tom(opt, train_loader, model, board):
    model  #.cuda()
    model.train()

    # criterion
    criterionL1 = nn.L1Loss()
    criterionVGG = VGGLoss()
    criterionMask = nn.L1Loss()

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 betas=(0.5, 0.999))
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: 1.0 - max(0, step - opt.keep_step) / float(
            opt.decay_step + 1))

    for step in range(opt.keep_step + opt.decay_step):
        iter_start_time = time.time()
        inputs = train_loader.next_batch()

        im = inputs['image']  #.cuda()
        im_pose = inputs['pose_image']
        im_h = inputs['head']
        shape = inputs['shape']

        agnostic = inputs['agnostic']  #.cuda()
        c = inputs['cloth']  #.cuda()
        cm = inputs['cloth_mask']  #.cuda()
        pcm = inputs['parse_cloth_mask']  #.cuda()

        # outputs = model(torch.cat([agnostic, c], 1))  # CP-VTON
        outputs = model(torch.cat([agnostic, c, cm], 1))  # CP-VTON+
        p_rendered, m_composite = torch.split(outputs, 3, 1)
        p_rendered = F.tanh(p_rendered)
        m_composite = F.sigmoid(m_composite)
        p_tryon = c * m_composite + p_rendered * (1 - m_composite)

        """visuals = [[im_h, shape, im_pose],
                   [c, cm*2-1, m_composite*2-1],
                   [p_rendered, p_tryon, im]]"""  # CP-VTON

        visuals = [[im_h, shape, im_pose],
                   [c, pcm * 2 - 1, m_composite * 2 - 1],
                   [p_rendered, p_tryon, im]]  # CP-VTON+

        loss_l1 = criterionL1(p_tryon, im)
        loss_vgg = criterionVGG(p_tryon, im)
        # loss_mask = criterionMask(m_composite, cm)  # CP-VTON
        loss_mask = criterionMask(m_composite, pcm)  # CP-VTON+
        loss = loss_l1 + loss_vgg + loss_mask
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (step + 1) % opt.display_count == 0:
            board_add_images(board, 'combine', visuals, step + 1)
            board.add_scalar('metric', loss.item(), step + 1)
            board.add_scalar('L1', loss_l1.item(), step + 1)
            board.add_scalar('VGG', loss_vgg.item(), step + 1)
            board.add_scalar('MaskL1', loss_mask.item(), step + 1)
            t = time.time() - iter_start_time
            print(
                'step: %8d, time: %.3f, loss: %.4f, l1: %.4f, vgg: %.4f, mask: %.4f'
                % (step + 1, t, loss.item(), loss_l1.item(), loss_vgg.item(),
                   loss_mask.item()),
                flush=True)

        if (step + 1) % opt.save_count == 0:
            save_checkpoint(
                model,
                os.path.join(opt.checkpoint_dir, opt.name,
                             'step_%06d.pth' % (step + 1)))
Esempio n. 37
0
    def forward(self, inputs, triples, lengths, elmo_embedding, id2_ids_batch):
        if self.args.pretrain_model_type == 'elmo':
            elmo_inputs = torch.Tensor().cuda()
            for i in range(len(inputs)):
                elmo_input = torch.from_numpy(elmo_embedding[' '.join(map(str, inputs[i].cpu().numpy()))].value).type(torch.cuda.FloatTensor)
                try:
                    elmo_inputs = torch.cat((elmo_inputs, elmo_input.unsqueeze(dim=0)))
                except:
                    elmo_inputs = torch.cat((elmo_inputs, elmo_input.unsqueeze(dim=0)[:,:128,:]), dim=0)
            inputs = elmo_inputs
        else:
            inputs = self.embedding(inputs)

        # Introducing external knowledge in different ways.
        t = torch.zeros(inputs.size(0), self.seq_length, self.input_dim + self.triples_embedding_dim).cuda()
        if self.args.concat_mode=="graph_attention":
            for i in range(len(inputs)):
                b = torch.full([self.seq_length, self.triples_number], -1, dtype=torch.long).cuda()
                bb = torch.zeros(self.seq_length, self.triples_embedding_dim).cuda()
                if (torch.equal(id2_ids_batch[i], b)):
                    t[i] = torch.cat((inputs[i], bb), dim=-1)
                else:
                    for k in range(len(id2_ids_batch[i])):
                        c = torch.full([self.triples_number], -1, dtype=torch.long).cuda()
                        cc = torch.zeros(self.triples_embedding_dim).cuda()
                        if (torch.equal(id2_ids_batch[i][k], c)):
                            t[i][k] = torch.cat((inputs[i][k], cc), dim=-1)
                        else:
                            list1 = torch.Tensor().cuda()
                            list2 = torch.Tensor().cuda()
                            head_id, tail_id, relation_id = torch.chunk(triples[i][k], 3, dim=1)
                            t2 = self.embeddings_entity(head_id).cuda()
                            t21 = self.embeddings_entity(tail_id).cuda()
                            t22 = self.embeddings_relation(relation_id).cuda()
                            head_tail = torch.cat((t2, t21), dim=2)
                            list1 = torch.cat((list1, head_tail), dim=0)
                            list2 = torch.cat((list2, t22), dim=0)
                            head_tail_transformed = self.entity_transformed(list1)
                            head_tail_transformed_final = F.tanh(head_tail_transformed)
                            relation_transformed1 = F.tanh(list2)
                            e_weight = (head_tail_transformed_final * relation_transformed1).sum(dim=2)
                            alpha_weight = F.softmax(e_weight, dim=0)
                            graph_embed = (alpha_weight.unsqueeze(1) * head_tail).sum(dim=0)
                            aa = torch.cat((inputs[i][k], graph_embed.squeeze(0)))
                            t[i][k] = aa
        else:
            for i in range(len(inputs)):
                dict = {}
                b = torch.full([self.seq_length, self.triples_number], -1, dtype=torch.long).cuda()
                bb = torch.zeros(self.seq_length, self.triples_embedding_dim).cuda()
                if (torch.equal(id2_ids_batch[i], b)):
                    t[i] = torch.cat((inputs[i], bb), dim=-1)
                else:
                    for k in range(len(id2_ids_batch[i])):
                        a = 0
                        input = torch.Tensor().cuda()
                        c = torch.full([self.triples_number], -1, dtype=torch.long).cuda()
                        cc = torch.zeros(self.triples_embedding_dim).cuda()
                        if (torch.equal(id2_ids_batch[i][k], c)):
                            t[i][k] = torch.cat((inputs[i][k], cc), dim=-1)
                        else:
                            for j in range(len(id2_ids_batch[i][k])):
                                if id2_ids_batch[i][k][j].cpu().numpy() == 1:
                                    inputs_triples = torch.cat(
                                        (inputs[i][k], self.embeddings_entity(triples[i][k][j][1])))
                                elif id2_ids_batch[i][k][j].cpu().numpy() == 2:
                                    inputs_triples = torch.cat(
                                        (inputs[i][k], self.embeddings_entity(triples[i][k][j][0])))
                                else:
                                    continue

                                if a == 0:
                                    a = a + 1
                                    input = torch.cat((inputs_triples, input))
                                else:
                                    a = a + 1
                                    input = input + inputs_triples

                        if a != 0:
                            input = input / a
                            dict[k] = input

                    for k in dict:
                        t[i][k] = dict[k]


        # 1. input
        embedded_input = self.dropout_on_input_to_LSTM(t)
        (sorted_input, sorted_lengths, input_unsort_indices, _) = sort_batch_by_length(embedded_input, lengths)
        packed_input = pack_padded_sequence(sorted_input, sorted_lengths.data.tolist(), batch_first=True)
        packed_sorted_output, _ = self.rnn(packed_input)
        sorted_output, _ = pad_packed_sequence(packed_sorted_output, batch_first=True)
        output = sorted_output[input_unsort_indices]

        # 2. use attention
        if self.args.attention_layer == 'att':
            attention_logits = self.attention_weights(output).squeeze(-1)
            mask_attention_logits = (attention_logits != 0).type(
                torch.cuda.FloatTensor if inputs.is_cuda else torch.FloatTensor)
            softmax_attention_logits = last_dim_softmax(attention_logits, mask_attention_logits)
            softmax_attention_logits0 = softmax_attention_logits.unsqueeze(dim=1)
            input_encoding = torch.bmm(softmax_attention_logits0, output)
            input_encoding0 = input_encoding.squeeze(dim=1)
        else:
            input_encoding = torch.Tensor().cuda()
            querys = self.query_embedding(torch.arange(0,self.args.num_classes,1).cuda())
            attention_weights = torch.Tensor(self.args.num_classes, len(output), len(output[0])).cuda()
            for i in range(self.args.num_classes):
                attention_logits = self.proquery_weights_mp(output)
                attention_logits = torch.bmm(attention_logits, querys[i].unsqueeze(dim=1).repeat(len(output),1,1)).squeeze(dim=-1)
                mask_attention_logits = (attention_logits != 0).type(
                    torch.cuda.FloatTensor if inputs.is_cuda else torch.FloatTensor)
                softmax_attention_logits = last_dim_softmax(attention_logits, mask_attention_logits)
                input_encoding_part = torch.bmm(softmax_attention_logits.unsqueeze(dim=1), output)
                input_encoding = torch.cat((input_encoding,input_encoding_part.squeeze(dim=1)), dim=-1)
                attention_weights[i] = softmax_attention_logits

        # 3. run linear layer
        if self.args.attention_layer == 'att':
            input_encodings = self.dropout_on_input_to_linear_layer(input_encoding0)
            unattized_output = self.output_projection(input_encodings)
            output_distribution = F.log_softmax(unattized_output, dim=-1)
            return output_distribution, softmax_attention_logits.squeeze(dim=1)
        else:
            input_encodings = self.dropout_on_input_to_linear_layer(input_encoding)
            unattized_output = self.multi_output_projection(input_encodings)
            output_distribution = F.log_softmax(unattized_output, dim=-1)
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-16)
            attention_loss = abs(cos(querys[0], querys[1])) + abs(cos(querys[1], querys[2])) \
                                                            + abs(cos(querys[0], querys[2]))
            return output_distribution, attention_weights, attention_loss
 def forward(self, x):
     x.cuda(self.device)
     x = F.relu(self.fc1(x)).to(self.device)
     x = F.tanh(self.fc2(x)).to(self.device)  #[-1,1]
     return x.cpu().data
 def forward(self, input):
     return F.tanh(self.fc(input)) * F.sigmoid(self.gate_fc(input))
Esempio n. 40
0
def first_pooler(x, w, b, train, dropout_prob):
    x = x[:, 0]
    x = F.linear(x, w, b)
    x = F.tanh(x)
    return x
Esempio n. 41
0
    def forward(self, input, mapping_layers=[]):
        seg = input

        ret_acts = {}

        x = F.interpolate(seg, size=(self.sh, self.sw))
        x = self.fc(x)
        x = self.fc_norm(x)

        if 'fc' in mapping_layers:
            ret_acts['fc'] = x

        x = self.head_0(x, seg)
        if 'head_0' in mapping_layers:
            ret_acts['head_0'] = x

        x = self.up(x)
        x = self.G_middle_0(x, seg)
        if 'G_middle_0' in mapping_layers:
            ret_acts['G_middle_0'] = x

        if self.opt.num_upsampling_layers == 'more' or \
                self.opt.num_upsampling_layers == 'most':
            x = self.up(x)

        x = self.G_middle_1(x, seg)
        if 'G_middle_1' in mapping_layers:
            ret_acts['G_middle_1'] = x

        x = self.up(x)
        x = self.up_0(x, seg)
        if 'up_0' in mapping_layers:
            ret_acts['up_0'] = x

        x = self.up(x)
        x = self.up_1(x, seg)
        if 'up_1' in mapping_layers:
            ret_acts['up_1'] = x

        x = self.up(x)
        x = self.up_2(x, seg)
        if 'up_2' in mapping_layers:
            ret_acts['up_2'] = x

        x = self.up(x)
        x = self.up_3(x, seg)
        if 'up_3' in mapping_layers:
            ret_acts['up_3'] = x

        if self.opt.num_upsampling_layers == 'most':
            x = self.up(x)
            x = self.up_4(x, seg)
            if 'up_4' in mapping_layers:
                ret_acts['up_4'] = x

        x = self.conv_img(F.leaky_relu(x, 2e-1))
        x = F.tanh(x)

        if len(mapping_layers) == 0:
            return x
        else:
            return x, ret_acts
Esempio n. 42
0
 def forward(self, X):
     return F.sigmoid(
         self.linear3(
             self.dropout(
                 F.tanh(self.linear2(self.dropout(F.tanh(
                     self.linear1(X))))))))
Esempio n. 43
0
    def forward(self, input_seq, last_hidden, encoder_outputs):
        '''
        :param input_seq:               (B,)
        :param last_hidden:   a tuple of two elem; (num_layers, batch, hidden_size)
        :param encoder_outputs:  (seq_len, batch, hidden_size * num_directions); num_dir = 1
        :return:
        '''
        # Note: we run this one step at a time     
        # Get the embedding of the current input word (last output word)
        max_len = encoder_outputs.size(0)
        batch_size = input_seq.size(0)
        input_seq = input_seq
        encoder_outputs = encoder_outputs.transpose(0,1)    # shape (B,max_len, H*num_dir)
            
        word_embedded = self.embedding(input_seq)  # S=1 x B x N; 此处还没有1; need to unsqueeze()
        word_embedded = self.embedding_dropout(word_embedded)

        # ATTENTION CALCULATION                     last_hidden (H_n, c_n)
        s_t = last_hidden[0][-1].unsqueeze(0)       # shape (1,B,H)
        H = s_t.repeat(max_len,1,1).transpose(0,1)  # shape (B,max_len, H)

        energy = F.tanh(self.W1(torch.cat([H,encoder_outputs], 2)))     # (B,max_len,H)
        energy = energy.transpose(2,1)                                  # (B,H,max_len)
        v = self.v.repeat(encoder_outputs.data.shape[0],1).unsqueeze(1) # [B*1*H]
        p_ptr = torch.bmm(v,energy)   # [B*1*T]
        
        a = F.softmax(p_ptr)    # dim = len(p_ptr.data.size())-1
        context = a.bmm(encoder_outputs)    # [B*1*T] * [B,T,H] ---> [B,1,H]

        # Combine embedded input word and attended context, run through RNN
        # (1,B,2*H)
        # TODO: for the case of B = 1
        rnn_input = torch.cat((word_embedded, context.squeeze(1)), 1).unsqueeze(0)
        '''
            Inputs: input, (h_0, c_0)
                - **input** (seq_len, batch, input_size): tensor containing the features
                  of the input sequence.
                  The input can also be a packed variable length sequence.
                  See :func:`torch.nn.utils.rnn.pack_padded_sequence` for details.
                - **h_0** (num_layers \* num_directions, batch, hidden_size): tensor
                  containing the initial hidden state for each element in the batch.
                - **c_0** (num_layers \* num_directions, batch, hidden_size): tensor
                  containing the initial cell state for each element in the batch.

                  If (h_0, c_0) is not provided, both **h_0** and **c_0** default to zero.

            Outputs: output, (h_n, c_n)
                - **output** (seq_len, batch, hidden_size * num_directions): tensor
                  containing the output features `(h_t)` from the last layer of the RNN,
                  for each t. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
                  given as the input, the output will also be a packed sequence.
                - **h_n** (num_layers * num_directions, batch, hidden_size): tensor
                  containing the hidden state for t=seq_len
                - **c_n** (num_layers * num_directions, batch, hidden_size): tensor
                  containing the cell state for t=seq_len
        '''
        # output shape (1,B,H);  为什么squeeze都不写??
        output, hidden = self.lstm(rnn_input, last_hidden)
        p_vacab = self.U(output)        # (1,B,Out_size)  ???
        
        gate = F.sigmoid(self.W(hidden[0][-1]))     # (B,1)
        # # (B*1*T) (1,B.Out_size)  (B,1)   a tuple of two elem; (num_layers, batch, hidden_size)
        return p_ptr, p_vacab, gate, hidden
Esempio n. 44
0
 def forward(self, x):
     x = self.nonlin(self.fc1(x))
     x = self.nonlin(self.fc2(x))
     x = (self.fc3(x))
     return f.tanh(x)
Esempio n. 45
0
def vectorize_question(args, batch, model, vocab_map, embeddings, padding_id):

    if args.model == 'lstm':
        lstm = model
    else:
        cnn = model

    titles, bodies, triples = batch
    title_length, title_num_questions = titles.shape
    body_length, body_num_questions = bodies.shape
    title_embeddings, body_embeddings = corpus.get_embeddings(
        titles, bodies, vocab_map, embeddings)

    # title
    if args.model == 'lstm':
        if args.cuda:
            title_inputs = [
                autograd.Variable(torch.FloatTensor(title_embeddings).cuda())
            ]
            title_inputs = torch.cat(title_inputs).view(
                title_length, title_num_questions, -1)
            # title_inputs = torch.cat(title_inputs).view(title_num_questions, title_length, -1)

            title_hidden = (autograd.Variable(
                torch.zeros(1, title_num_questions, args.hidden_size).cuda()),
                            autograd.Variable(
                                torch.zeros((1, title_num_questions,
                                             args.hidden_size)).cuda()))
            # title_hidden = (autograd.Variable(torch.zeros(1, title_length, args.hidden_size)),
            #       autograd.Variable(torch.zeros((1, title_length, args.hidden_size))))
        else:
            title_inputs = [
                autograd.Variable(torch.FloatTensor(title_embeddings))
            ]
            title_inputs = torch.cat(title_inputs).view(
                title_length, title_num_questions, -1)
            # title_inputs = torch.cat(title_inputs).view(title_num_questions, title_length, -1)

            title_hidden = (autograd.Variable(
                torch.zeros(1, title_num_questions, args.hidden_size)),
                            autograd.Variable(
                                torch.zeros((1, title_num_questions,
                                             args.hidden_size))))
    else:
        if args.cuda:
            title_inputs = [
                autograd.Variable(torch.FloatTensor(title_embeddings).cuda())
            ]
            #title_inputs = torch.cat(title_inputs).view(title_num_questions, 200, -1)
        else:
            title_inputs = [
                autograd.Variable(torch.FloatTensor(title_embeddings))
            ]
        title_inputs = torch.cat(title_inputs).transpose(0, 1).transpose(1, 2)

    if args.model == 'lstm':
        title_out, title_hidden = lstm(title_inputs, title_hidden)
    else:
        title_out = cnn(title_inputs)
        title_out = F.tanh(title_out)
        title_out = title_out.transpose(1, 2).transpose(0, 1)

    # average all words of each question from title_out
    # title_out (max sequence length) x (batch size) x (hidden size)
    average_title_out = average_questions(title_out, titles, padding_id)

    # body
    if args.model == 'lstm':
        if args.cuda:
            body_inputs = [
                autograd.Variable(torch.FloatTensor(body_embeddings).cuda())
            ]
            body_inputs = torch.cat(body_inputs).view(body_length,
                                                      body_num_questions, -1)

            body_hidden = (autograd.Variable(
                torch.zeros(1, body_num_questions, args.hidden_size).cuda()),
                           autograd.Variable(
                               torch.zeros((1, body_num_questions,
                                            args.hidden_size)).cuda()))
        else:
            body_inputs = [
                autograd.Variable(torch.FloatTensor(body_embeddings))
            ]
            body_inputs = torch.cat(body_inputs).view(body_length,
                                                      body_num_questions, -1)

            body_hidden = (autograd.Variable(
                torch.zeros(1, body_num_questions, args.hidden_size)),
                           autograd.Variable(
                               torch.zeros(
                                   (1, body_num_questions, args.hidden_size))))
    else:
        if args.cuda:
            body_inputs = [
                autograd.Variable(torch.FloatTensor(body_embeddings).cuda())
            ]
        else:
            body_inputs = [
                autograd.Variable(torch.FloatTensor(body_embeddings))
            ]
        body_inputs = torch.cat(body_inputs).transpose(0, 1).transpose(1, 2)

    if args.model == 'lstm':
        body_out, body_hidden = lstm(body_inputs, body_hidden)
    else:
        body_out = cnn(body_inputs)
        body_out = F.tanh(body_out)
        body_out = body_out.transpose(1, 2).transpose(0, 1)

    average_body_out = average_questions(body_out, bodies, padding_id)

    # average body and title
    # representations of the questions as found by the LSTM
    hidden = (average_title_out + average_body_out) * 0.5

    return hidden
Esempio n. 46
0
 def forward(self, input):
     out = F.leaky_relu(self.fc1(input), LEAK)
     out = F.leaky_relu(self.fc2(out), LEAK)
     out = F.leaky_relu(self.fc3(out), LEAK)
     out = F.tanh(self.fc4(out))
     return out
Esempio n. 47
0
    def forward(self, input, context):
        """
        input (FloatTensor): batch x tgt_len x dim: decoder's rnn's output.
        context (FloatTensor): batch x src_len x dim: src hidden states
        """

        # one step input
        if isinstance(context, tuple):
            context, tree_context = context

        if input.dim() == 2:
            one_step = True
            input = input.unsqueeze(1)
        else:
            one_step = False

        batch, sourceL, dim = context.size()
        batch_, targetL, dim_ = input.size()
        aeq(batch, batch_)
        aeq(dim, dim_)
        aeq(self.dim, dim)

        # compute attention scores, as in Luong et al.
        align = self.score(input, context)

        if self.mask is not None:
            mask_ = self.mask[:, None, :]
            align.data.masked_fill_(mask_, -math.inf)

        # Softmax to normalize attention weights
        align_vectors = F.softmax(align, dim=-1)
        # each context vector c_t is the weighted average
        # over all the source hidden states
        c = torch.bmm(align_vectors, context)
        if self.multi_key:
            # sharing attention weight
            if self.share_attn:
                sc = torch.bmm(align_vectors, tree_context)
            else:
                # computing attention scores for syntax
                tree_align = self.score(input, tree_context, True)
                if self.mask is not None:
                    tree_align.data.masked_fill_(self.mask[:, None, :],
                                                 -math.inf)
                tree_align_vectors = F.softmax(tree_align, dim=-1)
                sc = torch.bmm(tree_align_vectors, tree_context)

            z = F.sigmoid(self.gate(input))  # batch x tgt_len x dim
            self.z = z  # for visualization
            sc = sc * z
            concat_c = torch.cat([c, input, sc],
                                 2).view(batch * targetL, dim * 3)
        else:
            concat_c = torch.cat([c, input], 2).view(batch * targetL, dim * 2)

        attn_h = self.linear_out(concat_c).view(batch, targetL, dim)
        attn_h = F.tanh(attn_h)

        if one_step:
            attn_h = attn_h.squeeze(1)
            align_vectors = align_vectors.squeeze(1)

            # Check output sizes
            batch_, dim_ = attn_h.size()
            aeq(batch, batch_)
            aeq(dim, dim_)
            batch_, sourceL_ = align_vectors.size()
            aeq(batch, batch_)
            aeq(sourceL, sourceL_)
        else:
            attn_h = attn_h.transpose(0, 1).contiguous()
            align_vectors = align_vectors.transpose(0, 1).contiguous()

            # Check output sizes
            targetL_, batch_, dim_ = attn_h.size()
            aeq(targetL, targetL_)
            aeq(batch, batch_)
            aeq(dim, dim_)
            targetL_, batch_, sourceL_ = align_vectors.size()
            aeq(targetL, targetL_)
            aeq(batch, batch_)
            aeq(sourceL, sourceL_)

        return attn_h, align_vectors
Esempio n. 48
0
 def forward(self, lefts, rights, tracking=None):
     batch_size = len(lefts)
     ret = torch.cat(lefts, 0) + F.tanh(torch.cat(rights, 0))
     return torch.chunk(ret, batch_size, 0)
Esempio n. 49
0
 def forward(self, state):
     x = F.relu(self.bn1(self.fc1(state)))
     x = F.relu(self.fc2(x))
     return F.tanh(self.fc3(x))  
Esempio n. 50
0
 def forward(self, x):
     s = F.sigmoid(x)
     t = F.tanh(x)
     result = t + s
     return result
Esempio n. 51
0
 def forward(self, x, fusions):
     r_f = torch.cat([x, fusions], 2)
     r = F.tanh(self.linear_r(r_f))
     g = F.sigmoid(self.linear_g(r_f))
     o = g * r + (1 - g) * x
     return o
Esempio n. 52
0
 def forward(self, state):
     """Build an actor (policy) network that maps states -> actions."""
     x = state
     for i_f, f in enumerate(self.hidden):
         x = F.relu(f(x)) if i_f < len(self.hidden) - 1 else f(x)
     return F.tanh(x)
Esempio n. 53
0
    def forward(self, x):
        x = F.tanh(self.affine1(x))
        x = F.tanh(self.affine2(x))

        state_values = self.value_head(x)
        return state_values
Esempio n. 54
0
    def forward_glimpse_clouds(self, final_fm, pose_fm):
        # Size of the feature maps
        B, D, T, W, H = final_fm.size()

        # For storing attention weights of the workers
        self.list_attention_worker = [[] for _ in range(self.nb_glimpses)]

        # List of attention points
        list_v = []
        list_attention_points_glimpses = []

        # Init the hidden state of the zoomer
        h = torch.zeros(1, B, self.rnn_zoomer_size)
        h = h.cuda() if CUDA else h

        # Init the hidden state of the workers
        list_r = [
            torch.zeros(1, B, int(D / 4.)) for _ in range(self.nb_workers)
        ]
        list_r = [x.cuda() if CUDA else x for x in list_r]

        # Loop over time
        list_logits = []
        for t in range(T):
            # Extract the feature maps and the pose features
            final_fm_t, pose_fm_t = final_fm[:, :,
                                             t], pose_fm[:, :,
                                                         t]  # (B, 2048, 7, 7) - (B, 1024, 14, 14)
            c = self.avgpool_14x14(pose_fm_t).view(B, int(D / 2.))  # (B, 1024)

            # Hidden state of th workers
            r_all_workers = list_r[0]
            for r_w in list_r[1:]:
                r_all_workers = r_all_workers + r_w
            r_all_workers = r_all_workers.transpose(0, 1)  # (B, 1, D/4)

            # Loop over the glimpses
            for g in range(self.nb_glimpses):
                # Input of the RNN zoomer
                input_loc_params = torch.cat([c, h.view(B, int(D / 4.))],
                                             1)  # (B, 1536)

                # Estimate (x,y,scale_x,scale_y) of the glimpse
                loc = self.mlp_glimpse_location(input_loc_params)  # (B, 4)
                # ipdb.set_trace()
                loc_xy = F.tanh(
                    loc[:, :2])  # to make sure it is between -1 and 1
                loc_zooms = F.sigmoid(
                    loc[:, 2:] + 3.
                )  # to make sure it is between 0 and 1 - +3 for starting with a zoom ~ 1

                # Extract the corresponding features map with Spatial Transformer
                Z = zoom_ST(final_fm_t, loc_xy, loc_zooms, W, H,
                            CUDA)  # (B, 2048, 7, 7)

                # Get the visual and location features and finally append
                z = self.avgpool_7x7(Z).view(B, D)  # (B, 2048)
                v = z * self.mlp_embedding_location(loc)  # (B, 2048)

                # Store glimpse features and attention points
                list_v.append(v)
                list_attention_points_glimpses.append(
                    torch.cat([loc_xy, loc_zooms], 1))

                # Update the zoomer
                _, h = self.rnn_zoomer(
                    torch.cat([v.view(B, 1, D), r_all_workers], 2), h)

            # Compute the similarity matrix
            all_v = torch.stack(list_v, 1).view(B, t + 1, self.nb_glimpses,
                                                D)  # (B,t,C,D)
            similarity_matrix = self.compute_similarity_matrix(all_v)

            # Create the input for each worker
            list_v_tild = []
            # Distribute the features over the workers
            for w in range(self.nb_workers):
                # Get the input for the worker
                input_worker = self.get_worker_input(similarity_matrix, all_v,
                                                     w, t)
                list_v_tild.append(input_worker)

                # Catch the workers and its previous hidden state
                rnn, hidden = self.list_worker[w], list_r[0]

                # Run the rnn
                out, hidden = rnn(input_worker.unsqueeze(1), hidden)

                # Update the list of hidden state
                list_r[w] = hidden

                # And finally classify
                fc = self.list_fc[w]
                logits = fc(out.view(B, int(D / 4.)))
                list_logits.append(logits)

        # Stack
        all_logits = torch.stack(list_logits, 1)  # (B,T,60)

        # Average the logits
        logits = torch.mean(all_logits, 1)  # (B, 60)

        # Stack attention points
        attention_points_glimpses = torch.stack(list_attention_points_glimpses,
                                                1).view(
                                                    B, T, self.nb_glimpses, 4)

        return logits, attention_points_glimpses
Esempio n. 55
0
 def score(self, query, key):
     input = tr.cat([query, key], dim=-1)
     return self.linear2(F.tanh(self.linear1(input)))
Esempio n. 56
0
    def forward(self, xes, hidden, attn_params):
        """
        Compute attention over attn_params given input and hidden states.

        :param xes:         input state. will be combined with applied
                            attention.
        :param hidden:      hidden state from model. will be used to select
                            states to attend to in from the attn_params.
        :param attn_params: tuple of encoder output states and a mask showing
                            which input indices are nonzero.

        :returns: output, attn_weights
                  output is a new state of same size as input state `xes`.
                  attn_weights are the weights given to each state in the
                  encoder outputs.
        """
        if self.attention == 'none':
            # do nothing, no attention
            return xes, None

        if type(hidden) == tuple:
            # for lstms use the "hidden" state not the cell state
            hidden = hidden[0]
        last_hidden = hidden[-1]  # select hidden state from last RNN layer

        enc_out, attn_mask = attn_params
        bsz, seqlen, hszXnumdir = enc_out.size()
        numlayersXnumdir = last_hidden.size(1)

        if self.attention == 'local':
            # local attention weights aren't based on encoder states
            h_merged = torch.cat((xes.squeeze(1), last_hidden), 1)
            attn_weights = F.softmax(self.attn(h_merged), dim=1)

            # adjust state sizes to the fixed window size
            if seqlen > self.max_length:
                offset = seqlen - self.max_length
                enc_out = enc_out.narrow(1, offset, self.max_length)
                seqlen = self.max_length
            if attn_weights.size(1) > seqlen:
                attn_weights = attn_weights.narrow(1, 0, seqlen)
        else:
            hid = last_hidden.unsqueeze(1)
            if self.attention == 'concat':
                # concat hidden state and encoder outputs
                hid = hid.expand(bsz, seqlen, numlayersXnumdir)
                h_merged = torch.cat((enc_out, hid), 2)
                # then do linear combination of them with activation
                active = F.tanh(self.attn(h_merged))
                attn_w_premask = self.attn_v(active).squeeze(2)
            elif self.attention == 'dot':
                # dot product between hidden and encoder outputs
                if numlayersXnumdir != hszXnumdir:
                    # enc_out has two directions, so double hid
                    hid = torch.cat([hid, hid], 2)
                enc_t = enc_out.transpose(1, 2)
                attn_w_premask = torch.bmm(hid, enc_t).squeeze(1)
            elif self.attention == 'general':
                # before doing dot product, transform hidden state with linear
                # same as dot if linear is identity
                hid = self.attn(hid)
                enc_t = enc_out.transpose(1, 2)
                attn_w_premask = torch.bmm(hid, enc_t).squeeze(1)

            # calculate activation scores, apply mask if needed
            if attn_mask is not None:
                # remove activation from NULL symbols
                attn_w_premask.masked_fill_((1 - attn_mask), -NEAR_INF)
            attn_weights = F.softmax(attn_w_premask, dim=1)

        # apply the attention weights to the encoder states
        attn_applied = torch.bmm(attn_weights.unsqueeze(1), enc_out)
        # concatenate the input and encoder states
        merged = torch.cat((xes.squeeze(1), attn_applied.squeeze(1)), 1)
        # combine them with a linear layer and tanh activation
        output = torch.tanh(self.attn_combine(merged).unsqueeze(1))

        return output, attn_weights
    def forward(self, rep1, len1, mask1, rep2, len2):

        # Compute context vectors using attention.
        def context_vector(h_t):
            WhH = torch.matmul(h_t, self.Wh)

            # Use mask to ignore the outputs of the padding part in premise
            shape = WhH.size()
            WhH = WhH.view(shape[0], 1, shape[1])
            WhH = WhH.expand(shape[0], max_seq_len, shape[1])

            M1 = mask1.type(self.float_type)
            shape = M1.size()
            M = M1.view(shape[0], shape[1], 1).type(self.float_type)
            M = M.expand(shape[0], shape[1], self.lstm_size)

            WhH = WhH * M
            M = torch.tanh(WyY + WhH)
            aW = self.aW.view(1, 1, -1)
            aW = aW.expand(batch_size, max_seq_len, aW.size()[2])

            # Compute batch dot: the first step of a softmax
            batch_dot = M * aW
            batch_dot = torch.sum(batch_dot, 2)

            # Avoid overflow
            max_by_column, _ = torch.max(batch_dot, 1)
            max_by_column = max_by_column.view(-1, 1)
            max_by_column = max_by_column.expand(max_by_column.size()[0],
                                                 max_seq_len)

            batch_dot = torch.exp(batch_dot - max_by_column) * M1

            # Partition function and attention:
            # the second step of a softmax, use mask to ignore the padding
            partition = torch.sum(batch_dot, 1)
            partition = partition.view(-1, 1)
            partition = partition.expand(partition.size()[0], max_seq_len)
            attention = batch_dot / partition

            # compute context vector
            shape = attention.size()
            attention = attention.view(shape[0], shape[1], 1)
            attention = attention.expand(shape[0], shape[1], self.lstm_size)

            cv_t = outputs_1 * attention
            cv_t = torch.sum(cv_t, 1)

            return cv_t

        # ################# Forward Propagation code ###################

        # Set batch size
        batch_size = rep1.size()[0]

        # Representation of input sentences
        sent1 = self.embedding(rep1)
        sent2 = self.embedding(rep2)

        # Transform sentences representations to:
        # (sequence length * batch size * feqture size)
        sent1 = sent1.transpose(1, 0)
        sent2 = sent2.transpose(1, 0)

        # ----------------- YOUR CODE HERE ----------------------
        # Run the two LSTM's, compute the context vectors,
        # compute the final representation of the sentence pair,
        # and run it through the fully connected layer, then
        # through the softmax layer.
        rep = torch.cat((rep1, rep2), 0)
        #length = torch.cat((len1, len2), 0)

        # Representation for input sentences
        batch_size = rep1.size()[0]
        sents = self.embedding(rep)
        (sents_premise, sents_hypothesis) = torch.split(sents, batch_size)

        # (sequence length * batch size * feature size)
        sents_premise = sents_premise.transpose(1, 0)
        sents_hypothesis = sents_hypothesis.transpose(1, 0)

        # Initialize hidden states and cell states
        (hx, cx) = self.init_hidden(batch_size)
        hx = hx.view(batch_size, -1)
        cx = cx.view(batch_size, -1)
        hidden = (hx, cx)

        # Ouput of LSTM: sequence (length x mini batch x lstm size)
        outp = []
        hidden_states = []
        for inp in range(sents_premise.size(0)):
            hidden = self.lstm1(sents_premise[inp], hidden)
            outp += [hidden[0]]
            hidden_states += [hidden[1]]

        outp = torch.stack(outp).transpose(0, 1)
        len1 = (len1 - 1).view(-1, 1, 1).expand(outp.size(0), 1, outp.size(2))
        out = torch.gather(outp, 1, len1).transpose(1, 0)

        hidden_states = torch.stack(hidden_states).transpose(0, 1)
        #len1 = (len1-1).view(-1, 1, 1).expand(hidden_states.size(0), 1, hidden_states.size(2))
        hidden_state = torch.gather(hidden_states, 1, len1).transpose(1, 0)

        lstm_outs, hidden_hypothesis = self.lstm2(sents_hypothesis,
                                                  (out, hidden_state))
        lstm_outs = lstm_outs.transpose(0, 1)

        len2 = (len2 - 1).view(-1, 1, 1).expand(lstm_outs.size(0), 1,
                                                lstm_outs.size(2))
        lstm_out = torch.gather(lstm_outs, 1, len2)
        lstm_out = lstm_out.view(lstm_out.size(0), -1)

        #############################################
        outputs_1 = lstm_outs
        max_seq_len = rep1.size()[1]
        WyY = torch.matmul(outputs_1, self.Wy)
        context_vec = context_vector(lstm_out)
        final = torch.tanh(
            torch.matmul(context_vec, self.Wp) +
            torch.matmul(lstm_out, self.Wh))
        #############################################

        # Concatenate premise and hypothesis representations
        final = F.dropout(final, p=self.drop_out)

        # Output of fully connected layers
        fc_out = F.dropout(F.tanh(self.linear1(lstm_out)), p=self.drop_out)
        #fc_out = F.dropout(F.tanh(self.linear2(fc_out)), p=self.drop_out)
        #fc_out = F.dropout(F.tanh(self.linear3(fc_out)), p=self.drop_out)

        # Output of Softmax
        fc_out = self.linear2(fc_out)

        return F.log_softmax(fc_out, dim=1)
Esempio n. 58
0
    def forward(self, input, z=None):
        seg = input
        
        if self.opt.use_vae:
            # we sample z from unit normal and reshape the tensor
            if z is None:
                z = torch.randn(input.size(0), self.opt.z_dim,
                                dtype=torch.float32, device=input.get_device())
            x = self.fc(z)
            x = x.view(-1, 16*self.opt.ngf, self.sh, self.sw)
        else:
            # we downsample segmap and run convolution
            x = F.interpolate(seg, size=(self.sh, self.sw))
            x = self.fc(x)

        # encode segmentation labels
        seg1 = self.labelenc1(seg) # 256
        seg2 = self.labelenc2(seg1) # 128
        seg3 = self.labelenc3(seg2) # 64
        seg4 = self.labelenc4(seg3) # 32
        seg5 = self.labelenc5(seg4) # 16
        seg6 = self.labelenc6(seg5) # 8
        if self.num_upsampling_layers == 'more':
            seg7 = self.labelenc7(seg6)
            segout1 = seg7
            segout2 = self.up(segout1) + self.labellat1(seg6) 
            segout2 = self.labeldec1(segout2) 
            segout3 = self.up(segout2) + self.labellat2(seg5) 
            segout3 = self.labeldec2(segout3) 
            segout4 = self.up(segout3) + self.labellat3(seg4) 
            segout4 = self.labeldec3(segout4) 
            segout5 = self.up(segout4) + self.labellat4(seg3) 
            segout5 = self.labeldec4(segout5) 
            segout6 = self.up(segout5) + self.labellat5(seg2) 
            segout6 = self.labeldec5(segout6) 
            segout7 = self.up(segout6) + self.labellat6(seg1) 
            segout7 = self.labeldec6(segout7)
        else:
            segout1 = seg6
            segout2 = self.up(segout1) + self.labellat1(seg5)
            segout2 = self.labeldec1(segout2) 
            segout3 = self.up(segout2) + self.labellat2(seg4) 
            segout3 = self.labeldec2(segout3) 
            segout4 = self.up(segout3) + self.labellat3(seg3)
            segout4 = self.labeldec3(segout4) 
            segout5 = self.up(segout4) + self.labellat4(seg2)
            segout5 = self.labeldec4(segout5) 
            segout6 = self.up(segout5) + self.labellat5(seg1) 
            segout6 = self.labeldec5(segout6) 

        x = self.head_0(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout1), dim=1)) # 8

        x = self.up(x)
        x = self.G_middle_0(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout2), dim=1)) # 16
        if self.num_upsampling_layers == 'more':
            x = self.up(x)
            x = self.G_middle_1(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout3), dim=1)) 
        else:
            x = self.G_middle_1(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout2), dim=1)) # 16

        x = self.up(x)
        if self.num_upsampling_layers == 'more':
            x = self.up_0(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout4), dim=1)) # 32
        else:
            x = self.up_0(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout3), dim=1)) # 32

        x = self.up(x)
        if self.num_upsampling_layers == 'more':
            x = self.up_1(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout5), dim=1)) # 64
        else:
            x = self.up_1(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout4), dim=1)) # 64

        x = self.up(x)
        if self.num_upsampling_layers == 'more':
            x = self.up_2(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout6), dim=1)) # 128
        else:
            x = self.up_2(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout5), dim=1)) # 128

        x = self.up(x)
        if self.num_upsampling_layers == 'more':
            x = self.up_3(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout7), dim=1)) # 256
        else:
            x = self.up_3(x, torch.cat((F.interpolate(seg, size=x.size()[2:], mode='nearest'), segout6), dim=1)) # 256


        x = self.conv_img(F.leaky_relu(x, 2e-1))
        x = F.tanh(x)

        return x
Esempio n. 59
0
 def forward(self, x):
     x = F.relu(self.fc1(x))
     x = F.relu(self.fc2(x))
     out = F.tanh(self.out(x))
     return out
Esempio n. 60
0
    def forward(self, low, high):
        low = F.leaky_relu(self.low_conv1(low), negative_slope=0.05)
        low = F.leaky_relu(self.low_conv2(low), negative_slope=0.05)
        low = self.low_block1(low)
        low = F.leaky_relu(self.low_down1(low), negative_slope=0.05)

        low = self.low_channel_wise(low)
        # low = self.low_spatial_wise(low)

        low = self.low_block2(low)
        low = F.leaky_relu(self.low_down2(low), negative_slope=0.05)

        low = self.low_channel_wise2(low)

        low = self.low_block3(low)
        low = F.leaky_relu(self.low_down3(low), negative_slope=0.05)

        low = self.low_channel_wise3(low)

        low = self.low_block4(low)
        low = F.leaky_relu(self.low_down4(low), negative_slope=0.05)

        low = self.low_channel_wise4(low)

        high = F.leaky_relu(self.high_conv1(high), negative_slope=0.05)
        high = F.leaky_relu(self.high_conv2(high), negative_slope=0.05)
        high = self.high_block1(high)
        high = F.leaky_relu(self.high_down1(high), negative_slope=0.05)

        high = self.high_channel_wise(high)
        # high = self.high_spatial_wise(high)

        high = self.high_block2(high)
        high = F.leaky_relu(self.high_down2(high), negative_slope=0.05)

        high = self.high_channel_wise2(high)

        high = self.high_block3(high)
        high = F.leaky_relu(self.high_down3(high), negative_slope=0.05)

        high = self.high_channel_wise3(high)

        high = self.high_block4(high)
        high = F.leaky_relu(self.high_down4(high), negative_slope=0.05)

        high = self.high_channel_wise4(high)

        lstm_input = torch.cat([low, high], 1)

        #print(lstm_input.shape)
        fuse = self.fuse(lstm_input)

        h = torch.zeros(low.size(0), 32, low.size(2), low.size(3)).type(torch.cuda.FloatTensor)
        c = torch.zeros(low.size(0), 32, low.size(2), low.size(3)).type(torch.cuda.FloatTensor)
        lstm_seq = []
        for i in range(10):
            z = torch.cat([lstm_input, h], 1)
            i = self.conv_i(z)
            f = self.conv_f(z)
            g = self.conv_g(z)
            o = self.conv_o(z)
            c = f * c + i * g
            h = o * F.tanh(c)
            output_lstm = self.conv_lstm_output(h)
            lstm_seq.append(output_lstm)

        final = fuse + lstm_seq[len(lstm_seq) - 1]

        #return final, lstm_seq[len(lstm_seq) - 1]
        return final