def forward(self, input_, hx): """ Args: input_: A (batch, input_size) tensor containing input features. hx: A tuple (h_0, c_0), which contains the initial hidden and cell state, where the size of both states is (batch, hidden_size). time: The current timestep value, which is used to get appropriate running statistics. Returns: h_1, c_1: Tensors containing the next hidden and cell state. """ h_0, c_0 = hx batch_size = h_0.size(0) bias_batch = (self.bias.unsqueeze(0) .expand(batch_size, *self.bias.size())) wh = torch.mm(h_0, self.weight_hh) wh = torch.mm(h_0, self.weight_hh) wi = torch.mm(input_, self.weight_ih) bn_wh = self.bn_hh(wh) bn_wi = self.bn_ih(wi) f, i, o, g = torch.split(bn_wh + bn_wi + bias_batch, split_size=self.hidden_size, dim=1) c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g) h_1 = torch.sigmoid(o) * torch.tanh(self.bn_c(c_1)) return h_1, c_1
def forward(self, x): x = torch.tanh(self.fc1(x)) x = torch.tanh(self.fc2(x)) mu = self.fc3(x) logstd = torch.zeros_like(mu) std = torch.exp(logstd) return mu, std
def test_cse(self): x = Variable(torch.Tensor([0.4, 0.3]), requires_grad=True) y = Variable(torch.Tensor([0.7, 0.5]), requires_grad=True) trace = torch._C._tracer_enter((x, y), 0) w = (x + y) * (x + y) * (x + y) t = torch.tanh(w) + torch.tanh(w) z = (x + y) * (x + y) * (x + y) + t torch._C._tracer_exit((z,)) torch._C._jit_pass_lint(trace) torch._C._jit_pass_cse(trace) self.assertExpected(str(trace))
def forward(self, input, doc_lens): """ :param input: (B*S, L) :param doc_lens: (B) :return: """ sent_lens = torch.sum(torch.sign(input), dim=1).data # (B*S); word id is a positive number and pad_id is 0 input = self.embed(input) # (B*S, L, D) # word level GRU input = self.word_RNN(input)[0] # (B*S, L, D) -> (B*S, L, 2*H), (B*S, 1, 2*H) -> (B*S, L, 2*H) # word_out = self.avg_pool1d(x, sent_lens) word_out = self.max_pool1d(input, sent_lens) # (B*S, L, 2*H) -> (B*S, 2*H) # make sent features(pad with zeros) input = self.pad_doc(word_out, doc_lens) # (B*S, 2*H) -> (B, max_doc_len, 2*H) # sent level GRU sent_out = self.sent_RNN(input)[0] # (B, max_doc_len, 2*H) -> (B, max_doc_len, 2*H) # docs = self.avg_pool1d(sent_out, doc_lens) # (B, 2*H) docs = self.max_pool1d(sent_out, doc_lens) # (B, 2*H) batch_probs = [] for index, doc_len in enumerate(doc_lens): # for idx, doc_len in (B) valid_hidden = sent_out[index, :doc_len, :] # (doc_len, 2*H) doc = torch.tanh(self.fc(docs[index])).unsqueeze(0) # (1, 2*H) s = torch.zeros(1, 2 * self.args.hidden_dim).to(opt.device) # (1, 2*H) probs = [] for position, h in enumerate(valid_hidden): h = h.view(1, -1) # (1, 2*H) # get position embeddings abs_index = torch.LongTensor([[position]]).to(opt.device) abs_features = self.abs_pos_embed(abs_index).squeeze(0) rel_index = int((position + 1) * 9.0 / doc_len) rel_index = torch.LongTensor([[rel_index]]).to(opt.device) rel_features = self.rel_pos_embed(rel_index).squeeze(0) # classification layer content = self.content(h) # (1, 2*H) -> (1, 1) salience = self.salience(h, doc) # (1, 2*H), (1, 2*H) -> (1, 1) novelty = -1 * self.novelty(h, torch.tanh(s)) # (1, 2*H), (1, 2*H) -> (1, 1) abs_p = self.abs_pos(abs_features) # (1, 1) rel_p = self.rel_pos(rel_features) # (1, 1) prob = torch.sigmoid(content + salience + novelty + abs_p + rel_p + self.bias) # (1, 1); [[0.35]] s = s + torch.mm(prob, h) # (1, 2*H) + (1, 1) * (1, 2*H) -> (1, 2*H) probs.append(prob) # S * (1, 1) batch_probs.append(torch.cat(probs).squeeze()) # (S*1, 1) -> (S) -> B * (S) # return torch.stack(batch_probs).squeeze() # B * (S) -> (B, S) return torch.cat(batch_probs).squeeze() # B * (S) -> (B * S)
def forward(self, data, last_hidden): hx, cx = last_hidden m = self.wmx(data) * self.wmh(hx) gates = self.wx(data) + self.wh(m) i, f, o, u = gates.chunk(4, 1) i = torch.sigmoid(i) f = torch.sigmoid(f) u = torch.tanh(u) o = torch.sigmoid(o) cy = f * cx + i * u hy = o * torch.tanh(cy) return hy, cy
def forward(self, input, hidden_state): hidden,c=hidden_state#hidden and c are images with several channels #print 'hidden ',hidden.size() #print 'input ',input.size() combined = torch.cat((input, hidden), 1)#oncatenate in the channels #print 'combined',combined.size() A=self.conv(combined) (ai,af,ao,ag)=torch.split(A,self.num_features,dim=1)#it should return 4 tensors i=torch.sigmoid(ai) f=torch.sigmoid(af) o=torch.sigmoid(ao) g=torch.tanh(ag) next_c=f*c+i*g next_h=o*torch.tanh(next_c) return next_h, next_c
def encode(self, src_sents_var: torch.Tensor, src_sent_lens: List[int]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: """ Use a GRU/LSTM to encode source sentences into hidden states Args: src_sents: list of source sentence tokens Returns: src_encodings: hidden states of tokens in source sentences, this could be a variable with shape (batch_size, source_sentence_length, encoding_dim), or in orther formats decoder_init_state: decoder GRU/LSTM's initial state, computed from source encodings """ # (src_sent_len, batch_size, embed_size) src_word_embeds = self.src_embed(src_sents_var) packed_src_embed = pack_padded_sequence(src_word_embeds, src_sent_lens) # src_encodings: (src_sent_len, batch_size, hidden_size * 2) src_encodings, (last_state, last_cell) = self.encoder_lstm(packed_src_embed) src_encodings, _ = pad_packed_sequence(src_encodings) # (batch_size, src_sent_len, hidden_size * 2) src_encodings = src_encodings.permute(1, 0, 2) dec_init_cell = self.decoder_cell_init(torch.cat([last_cell[0], last_cell[1]], dim=1)) dec_init_state = torch.tanh(dec_init_cell) return src_encodings, (dec_init_state, dec_init_cell)
def forward(self, s_t_hat, h, enc_padding_mask, coverage): b, t_k, n = list(h.size()) h = h.view(-1, n) # B * t_k x 2*hidden_dim encoder_feature = self.W_h(h) dec_fea = self.decode_proj(s_t_hat) # B x 2*hidden_dim dec_fea_expanded = dec_fea.unsqueeze(1).expand(b, t_k, n).contiguous() # B x t_k x 2*hidden_dim dec_fea_expanded = dec_fea_expanded.view(-1, n) # B * t_k x 2*hidden_dim att_features = encoder_feature + dec_fea_expanded # B * t_k x 2*hidden_dim if self.args.is_coverage: coverage_input = coverage.view(-1, 1) # B * t_k x 1 coverage_feature = self.W_c(coverage_input) # B * t_k x 2*hidden_dim att_features = att_features + coverage_feature e = torch.tanh(att_features) # B * t_k x 2*hidden_dim scores = self.v(e) # B * t_k x 1 scores = scores.view(-1, t_k) # B x t_k attn_dist_ = F.softmax(scores, dim=1)*enc_padding_mask # B x t_k normalization_factor = attn_dist_.sum(1, keepdim=True) attn_dist = attn_dist_ / normalization_factor attn_dist = attn_dist.unsqueeze(1) # B x 1 x t_k h = h.view(-1, t_k, n) # B x t_k x 2*hidden_dim c_t = torch.bmm(attn_dist, h) # B x 1 x n c_t = c_t.view(-1, self.args.hidden_dim * 2) # B x 2*hidden_dim attn_dist = attn_dist.view(-1, t_k) # B x t_k if self.args.is_coverage: coverage = coverage.view(-1, t_k) coverage = coverage + attn_dist return c_t, attn_dist, coverage
def forward(self, input_seq, last_hidden, encoder_outputs): # Note: we run this one step at a time # Get the embedding of the current input word (last output word) batch_size = input_seq.size(0) embedded = self.embedding(input_seq) embedded = embedded.view(1, batch_size, self.hidden_size) # S=1 x B x N # Get current hidden state from input word and last hidden state rnn_output, hidden = self.gru(embedded, last_hidden) # Calculate attention from current RNN state and all encoder outputs; # apply to encoder outputs to get weighted average context = self.attn(rnn_output, encoder_outputs) context = context.squeeze(1) # context is 32 by 256 # Attentional vector using the RNN hidden state and context vector # concatenated together (Luong eq. 5) rnn_output = rnn_output.squeeze(0) # S=1 x B x N -> B x N # rnn_output is 32 by 256 concat_input = torch.cat((rnn_output, context), 1) concat_output = torch.tanh(self.concat(concat_input)) # Finally predict next token (Luong eq. 6, without softmax) output = self.out(concat_output) # output is 32 by vocab_size output = self.LogSoftmax(output) # Return final output, hidden state return output, hidden
def decode_step(self, enc_hs, enc_mask, input_, hidden): ''' enc_hs: tuple(enc_hs, scale_enc_hs) ''' src_seq_len = enc_hs[0].size(0) h_t, hidden = self.dec_rnn(input_, hidden) # ht: batch x trg_hid_dim # enc_hs: seq_len x batch x src_hid_dim*2 # attns: batch x 1 x seq_len _, attns = self.attn(h_t, enc_hs, enc_mask, weighted_ctx=False) # Concatenate the ht and hs # ctx: batch x seq_len x (trg_hid_siz+src_hid_size*2) ctx = torch.cat( (h_t.unsqueeze(1).expand(-1, src_seq_len, -1), enc_hs[0].transpose( 0, 1)), dim=2) # ctx: batch x seq_len x out_dim ctx = self.linear_out(ctx) ctx = torch.tanh(ctx) # word_prob: batch x seq_len x nb_vocab word_prob = F.softmax(self.final_out(ctx), dim=-1) # word_prob: batch x nb_vocab word_prob = torch.bmm(attns, word_prob).squeeze(1) return torch.log(word_prob), hidden, attns
def decode_step(self, enc_hs, enc_mask, input_, hidden): src_seq_len, bat_siz = enc_mask.shape h_t, hidden = self.dec_rnn(input_, hidden) # Concatenate the ht and hs # ctx_trans: batch x seq_len x (trg_hid_siz*2) ctx_trans = torch.cat( (h_t.unsqueeze(1).expand(-1, src_seq_len, -1), enc_hs[1].transpose( 0, 1)), dim=2) trans = F.softmax(self.trans(ctx_trans), dim=-1) trans_list = trans.split(1, dim=1) ws = (self.wid_siz - 1) // 2 trans_shift = [ F.pad(t, (-ws + i, src_seq_len - (ws + 1) - i)) for i, t in enumerate(trans_list) ] trans = torch.cat(trans_shift, dim=1) trans = trans * enc_mask.transpose(0, 1).unsqueeze(1) + EPSILON trans = trans / trans.sum(-1, keepdim=True) trans = trans.log() # Concatenate the ht and hs # ctx_emiss: batch x seq_len x (trg_hid_siz+src_hid_size*2) ctx_emiss = torch.cat( (h_t.unsqueeze(1).expand(-1, src_seq_len, -1), enc_hs[0].transpose( 0, 1)), dim=2) ctx = torch.tanh(self.linear_out(ctx_emiss)) # emiss: batch x seq_len x nb_vocab emiss = F.log_softmax(self.final_out(ctx), dim=-1) return trans, emiss, hidden
def forward(self, input_, c_input, hx): """ Args: batch = 1 input_: A (batch, input_size) tensor containing input features. c_input: A list with size c_num,each element is the input ct from skip word (batch, hidden_size). hx: A tuple (h_0, c_0), which contains the initial hidden and cell state, where the size of both states is (batch, hidden_size). Returns: h_1, c_1: Tensors containing the next hidden and cell state. """ h_0, c_0 = hx batch_size = h_0.size(0) #assert(batch_size == 1) bias_batch = (self.bias.unsqueeze(0).expand(batch_size, *self.bias.size())) wh_b = torch.addmm(bias_batch, h_0, self.weight_hh) wi = torch.mm(input_, self.weight_ih) i, o, g = torch.split(wh_b + wi, split_size_or_sections=self.hidden_size, dim=1) i = torch.sigmoid(i) g = torch.tanh(g) o = torch.sigmoid(o) c_num = len(c_input) if c_num == 0: f = 1 - i c_1 = f*c_0 + i*g h_1 = o * torch.tanh(c_1) else: c_input_var = torch.cat(c_input, 0) alpha_bias_batch = (self.alpha_bias.unsqueeze(0).expand(batch_size, *self.alpha_bias.size())) c_input_var = c_input_var.squeeze(1) ## (c_num, hidden_dim) alpha_wi = torch.addmm(self.alpha_bias, input_, self.alpha_weight_ih).expand(c_num, self.hidden_size) alpha_wh = torch.mm(c_input_var, self.alpha_weight_hh) alpha = torch.sigmoid(alpha_wi + alpha_wh) ## alpha = i concat alpha alpha = torch.exp(torch.cat([i, alpha],0)) alpha_sum = alpha.sum(0) ## alpha = softmax for each hidden element alpha = torch.div(alpha, alpha_sum) merge_i_c = torch.cat([g, c_input_var],0) c_1 = merge_i_c * alpha c_1 = c_1.sum(0).unsqueeze(0) h_1 = o * torch.tanh(c_1) return h_1, c_1
def f(x, y): out = x + y with torch.jit.scope('Foo', out): out = x * out with torch.jit.scope('Bar', out): out = torch.tanh(out) out = torch.sigmoid(out) return out
def forward(self, input_tensor, cur_state): h_cur, c_cur = cur_state combined = torch.cat([input_tensor, h_cur], dim=1) # concatenate along channel axis combined_conv = self.conv(combined) cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1) i = torch.sigmoid(cc_i) f = torch.sigmoid(cc_f) o = torch.sigmoid(cc_o) g = torch.tanh(cc_g) c_next = f * c_cur + i * g h_next = o * torch.tanh(c_next) return h_next, c_next
def forward(self, words): emb = self.embedding(words) emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size h = emb_sum.view(1, -1) # size(h) = 1 x emb_size for i in range(self.nlayers): h = torch.tanh(self.linears[i](h)) out = self.output_layer(h) return out
def norm_flow_reverse(self, params, z1, z2): h = torch.tanh(params[1][0](z2)) mew_ = params[1][1](h) sig_ = torch.sigmoid(params[1][2](h)) #[PB,Z] z1 = (z1 - mew_) / sig_ logdet2 = torch.sum(torch.log(sig_), 1) h = torch.tanh(params[0][0](z1)) mew_ = params[0][1](h) sig_ = torch.sigmoid(params[0][2](h)) #[PB,Z] z2 = (z2 - mew_) / sig_ logdet = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z1, z2, logdet
def _attention(self, query_t, keys_bth, keys_mask): B, T, H = keys_bth.shape q = self.W_a(query_t.view(-1, self.hsz)).view(B, 1, H) u = self.E_a(keys_bth.contiguous().view(-1, self.hsz)).view(B, T, H) z = torch.tanh(q + u) a = self.v(z.view(-1, self.hsz)).view(B, T) a = a.masked_fill(keys_mask == 0, -1e9) a = F.softmax(a, dim=-1) return a
def forward(self, input): x = F.relu(self.linear_bn(self.linear(input))) x = x.view(-1, self.d*8, 2, 2) x = F.relu(self.deconv1_bn(self.deconv1(x))) x = x[:,:,:-1,:-1] # hacky way to get shapes right (like "SAME" in tf) x = F.relu(self.deconv2_bn(self.deconv2(x))) x = F.relu(self.deconv3_bn(self.deconv3(x))) x = x[:,:,:-1,:-1] x = torch.tanh(self.deconv4(x)) x = x[:,:,:-1,:-1] return x
def test_disabled_traced_function(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) @torch.jit.compile(enabled=False) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y))) z = doit(x, y) z2 = doit(x, y) self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y)))) self.assertEqual(z, z2)
def step(self, x: torch.Tensor, h_tm1: Tuple[torch.Tensor, torch.Tensor], src_encodings: torch.Tensor, src_encoding_att_linear: torch.Tensor, src_sent_masks: torch.Tensor) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: # h_t: (batch_size, hidden_size) h_t, cell_t = self.decoder_lstm(x, h_tm1) ctx_t, alpha_t = self.dot_prod_attention(h_t, src_encodings, src_encoding_att_linear, src_sent_masks) att_t = torch.tanh(self.att_vec_linear(torch.cat([h_t, ctx_t], 1))) # E.q. (5) att_t = self.dropout(att_t) return (h_t, cell_t), att_t, alpha_t
def tanh_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) input = torch.tanh(input) # [-1, 1] input_rescale = (input + 1.0) / 2 #[0, 1] n = math.pow(2.0, bits) - 1 v = torch.floor(input_rescale * n + 0.5) / n v = 2 * v - 1 # [-1, 1] v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh return v
def forward(self, inp, h0_l0, c0_l0, h0_l1, c0_l1): # Look up the embeddings of the input words if inp in self.w2i: inp = self.encoder[self.w2i[inp]] else: inp = self.encoder[self.unk_idx] # LAYER 0 # forget gate f_g_l0 = torch.sigmoid((torch.matmul(self.w_if_l0, inp) + self.b_if_l0) + (torch.matmul(self.w_hf_l0 , h0_l0) + self.b_hf_l0)) # input gate i_g_l0 = torch.sigmoid((torch.matmul(self.w_ii_l0,inp) + self.b_ii_l0) + (torch.matmul(self.w_hi_l0, h0_l0) + self.b_hi_l0)) # output gate o_g_l0 = torch.sigmoid((torch.matmul(self.w_io_l0, inp) + self.b_io_l0) + (torch.matmul(self.w_ho_l0, h0_l0) + self.b_ho_l0)) # intermediate cell state c_tilde_l0 = torch.tanh((torch.matmul(self.w_ig_l0, inp) + self.b_ig_l0) + (torch.matmul(self.w_hg_l0, h0_l0) + self.b_hg_l0)) # current cell state cx_l0 = f_g_l0 * c0_l0 + i_g_l0 * c_tilde_l0 # hidden state hx_l0 = o_g_l0 * torch.tanh(cx_l0) # LAYER 1 # forget gate f_g_l1 = torch.sigmoid((torch.matmul(self.w_if_l1 , hx_l0)+ self.b_if_l1) + (torch.matmul(self.w_hf_l1, h0_l1) + self.b_hf_l1)) # input gate i_g_l1 = torch.sigmoid((torch.matmul(self.w_ii_l1 , hx_l0)+ self.b_ii_l1) + (torch.matmul(self.w_hi_l1, h0_l1) + self.b_hi_l1)) # output gate o_g_l1 = torch.sigmoid((torch.matmul(self.w_io_l1 , hx_l0) + self.b_io_l1) + (torch.matmul(self.w_ho_l1, h0_l1) + self.b_ho_l1)) # intermediate cell state c_tilde_l1 = torch.tanh((torch.matmul(self.w_ig_l1 , hx_l0)+ self.b_ig_l1) + (torch.matmul(self.w_hg_l1, h0_l1) + self.b_hg_l1)) # current cell state cx_l1 = f_g_l1 * c0_l1 + i_g_l1 * c_tilde_l1 # hidden state hx_l1 = o_g_l1 * torch.tanh(cx_l1) out = torch.matmul(self.w_decoder, hx_l1) + self.b_decoder return out, [hx_l0, cx_l0, f_g_l0, i_g_l0, o_g_l0, c_tilde_l0], [hx_l1, cx_l1, f_g_l1, i_g_l1, o_g_l1, c_tilde_l1]
def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_keys = self.proj_enc(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_max = torch.max(sent_output, 1)[0].squeeze(1) # (bsize, 2*nhid) sent_summary = self.proj_query( sent_max).unsqueeze(1).expand_as(sent_keys) # (bsize, seqlen, 2*nhid) sent_M = torch.tanh(sent_keys + sent_summary) # (bsize, seqlen, 2*nhid) YANG : M = tanh(Wh_i + Wh_avg sent_w = self.query_embedding(Variable(torch.LongTensor( bsize*[0]).cuda())).unsqueeze(2) # (bsize, 2*nhid, 1) sent_alphas = self.softmax(sent_M.bmm(sent_w).squeeze(2)).unsqueeze(1) # (bsize, 1, seqlen) if int(time.time()) % 200 == 0: print('w', torch.max(sent_w[0]), torch.min(sent_w[0])) print('alphas', sent_alphas[0][0][0:sent_len[0]]) # Get attention vector emb = sent_alphas.bmm(sent_output_proj).squeeze(1) return emb
def test_traced_function(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) @torch.jit.compile(nderivs=0) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y))) z = doit(x, y) with self.assertCompiled(doit): z2 = doit(x, y) self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y)))) self.assertEqual(z, z2)
def norm_flow(self, params, z1, z2): # print (z.size()) h = torch.tanh(params[0][0](z1)) mew_ = params[0][1](h) # sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z] sig_ = torch.sigmoid(params[0][2](h)) #[PB,Z] z2 = z2*sig_ + mew_ logdet = torch.sum(torch.log(sig_), 1) h = torch.tanh(params[1][0](z2)) mew_ = params[1][1](h) # sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z] sig_ = torch.sigmoid(params[1][2](h)) #[PB,Z] z1 = z1*sig_ + mew_ logdet2 = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z1, z2, logdet
def test_compile_run_twice(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) @torch.jit.compile(nderivs=0, optimize=False) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y))) z = doit(x, y) with self.assertCompiled(doit): z2 = doit(x, y) self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y)))) self.assertEqual(z, z2)
def forward(self, tensor, c=None): h_filter = self.filter_conv(tensor) h_gate = self.gate_conv(tensor) if self.local_conditioning: h_filter += self.filter_conv_c(c) h_gate += self.gate_conv_c(c) out = torch.tanh(h_filter) * torch.sigmoid(h_gate) res = self.res_conv(out) skip = self.skip_conv(out) if self.skip else None return (tensor + res) * math.sqrt(0.5), skip
def test_compile_addc(self): x = Variable(torch.Tensor([0.4]), requires_grad=True).float().cuda() y = Variable(torch.Tensor([0.7]), requires_grad=True).float().cuda() @torch.jit.compile(nderivs=0) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y) + 1)) z = doit(x, y) with self.assertCompiled(doit): z2 = doit(x, y) self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y) + 1))) self.assertEqual(z, z2)
def memModel(contxtWords, aspectWords, position, sentLength): vaspect = aspectWords for i in range(hopNumber): Vi = 1.0 - position / sentLength - (i / vectorLength) * (1.0 - 2.0 * (position / sentLength)) Mi = Vi.expand_as(contxtWords) * contxtWords attentionContxt = torch.mm(attention_w, Mi) alphaContxt = softmax(torch.mm(attention_wh, torch.tanh(attentionContxt))) g0 = torch.sum(alphaContxt.expand_as(Mi) * Mi, 1) attentionE = torch.mm(attention_w, vaspect) + torch.mm(attention_wg, g0).expand(k, 1) alphaE = softmax(torch.mm(attention_wh, torch.tanh(attentionE))) ge = torch.sum(alphaE.expand_as(vaspect) * vaspect, 1) attentionEContxt = torch.mm(attention_w, Mi) + torch.mm(attention_wg, ge).expand(k, sentLength) alphaEContxt = softmax(torch.mm(attention_wh, torch.tanh(attentionEContxt))) gContxt = torch.sum(alphaEContxt.expand_as(Mi) * Mi, 1) linearLayerOut = torch.mm(linearLayer_W, vaspect) + linearLayer_b vaspect = gContxt + linearLayerOut finallinearLayerOut = torch.mm(softmaxLayer_W, vaspect) + softmaxLayer_b return finallinearLayerOut
def seqAttention(sequence, weights): """ Applies attention to the given sequence """ #compute the importance over the sequence importance = t.tanh(t.matmul(sequence, weights)) #compute the attention attention = f.softmax(importance, 0) tSeq = sequence.permute(1,0) #compute and return the representation return t.matmul(tSeq, attention)
import torch import numpy as np data = np.arange(15) - 5 np.random.shuffle(data) a = torch.Tensor(data) print(a) print(torch.tanh(a)) print(torch.tanh(a).view(3, 5)) print(torch.tanh(a).view(3, 5).prod(0))
def forward(self, Y, Mi, Ymask, Mmask): #Y=[B,1xd] Ymask=[B,1x1] Mi=[B,mxd] Mmask=[B,mx1] Mhat = torch.tanh(self.W1(Y.expand(-1, Mi.size(1), -1)) + self.W2(Mi)) g = torch.sigmoid(self.W3(Y.expand(-1, Mi.size(1), -1)) + self.W4(Mi)) Mnext = torch.mul(1 - g, Mi) + torch.mul(g, Mhat) return F.normalize(1e-7 + Mnext, dim=-1)
matplotlib """ import torch import torch.nn.functional as F from torch.autograd import Variable import matplotlib.pyplot as plt # fake data x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1) x = Variable(x) x_np = x.data.numpy() # numpy array for plotting # following are popular activation functions y_relu = torch.relu(x).data.numpy() y_sigmoid = torch.sigmoid(x).data.numpy() y_tanh = torch.tanh(x).data.numpy() y_softplus = F.softplus(x).data.numpy() # there's no softplus in torch y_softmax = torch.softmax(x, dim=0).data.numpy( ) #softmax is a special kind of activation function, it is about probability # plt to visualize these activation function plt.figure(1, figsize=(8, 6)) plt.subplot(231) plt.plot(x_np, y_relu, c='red', label='relu') plt.ylim((-1, 5)) plt.legend(loc='best') plt.subplot(232) plt.plot(x_np, y_sigmoid, c='red', label='sigmoid') plt.ylim((-0.2, 1.2)) plt.legend(loc='best')
def get_matrix(self, encoderp): tp = torch.tanh(self.wp(encoderp)) tc = torch.tanh(self.wc(encoderp)) f = tp.bmm(self.wa(tc).transpose(1, 2)) return F.softmax(f, dim=2)
def train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers, args): """Training loop for one epoch.""" losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()), (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())]) classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)) batch_time = tnt.AverageValueMeter() data_time = tnt.AverageValueMeter() # For Early Exit, we define statistics for each exit # So exiterrors is analogous to classerr for the non-Early Exit case if args.earlyexit_lossweights: args.exiterrors = [] for exitnum in range(args.num_exits): args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))) total_samples = len(train_loader.sampler) batch_size = train_loader.batch_size steps_per_epoch = math.ceil(total_samples / batch_size) msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size) epoch_frac = args.partial_epoch steps_per_frac_epoch = math.ceil((total_samples*epoch_frac) / batch_size) # Switch to train mode model.train() end = time.time() for train_step, (inputs, target) in enumerate(train_loader): # Measure data loading time data_time.add(time.time() - end) inputs, target = inputs.to('cuda'), target.to('cuda') if train_step == steps_per_frac_epoch: break # Execute the forward phase, compute the output and measure loss if compression_scheduler: compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer) if args.kd_policy is None: output = model(inputs) else: output = args.kd_policy.forward(inputs) if not args.earlyexit_lossweights: # ------------------------------------------------------------------ AHMED edit sin2-reg - April19 """ adding sin2 regularization here""" qbits_dict = {} sin2_reg_loss = 0 #print('weights:', (model.module.conv2.weight.size())) bw = 3 qbits_dict['conv1'] = bw qbits_dict['conv2'] = bw qbits_dict['fc1'] = bw qbits_dict['fc2'] = bw qbits_dict['fc3'] = bw # --------------------- #kernel = model.module.features[0].float_weight kernel1 = model.module.conv1.weight kernel2 = model.module.conv2.weight kernel3 = model.module.fc1.weight kernel4 = model.module.fc2.weight kernel5 = model.module.fc3.weight last_epoch = 999 if (train_step == last_epoch): w1 = kernel1.data.cpu().numpy() w2 = kernel2.data.cpu().numpy() w3 = kernel3.data.cpu().numpy() w4 = kernel4.data.cpu().numpy() w5 = kernel5.data.cpu().numpy() np.save('weights_sin2Reg/cifar10_L1_weights'+str(last_epoch), w1) np.save('weights_sin2Reg/cifar10_L2_weights'+str(last_epoch), w2) np.save('weights_sin2Reg/cifar10_L3_weights'+str(last_epoch), w3) np.save('weights_sin2Reg/cifar10_L4_weights'+str(last_epoch), w4) np.save('weights_sin2Reg/cifar10_L5_weights'+str(last_epoch), w5) print('++++saving weights+++++++++++++++++++++++++++') # --------------------- # ---------------------------------- q = 2 power = 2 step = 1/(2**(q)-0.5) # dorefa shift = step/2 #step = 1/(2**(q)-1) # wrpn #shift = 0 #amplitude = (np.sin(pi*(weight+step/2)/(step)))**2 step = 1/(2**(model.module.B1.clone())-0.5) # dorefa #step = 1/(2**(5)-0.5) # dorefa shift = step/2 #kernel = model.module.conv1.float_weight kernel = model.module.conv1.weight #sin2_func_1 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv1']))-1)),2)) sin2_func_1 =torch.mean((torch.sin(pi*(kernel+shift)/(step)))**power) # dorefa #print(sin2_func_1.data[0]) step = 1/(2**(model.module.B2.clone())-0.5) # dorefa #step = 1/(2**(3)-0.5) # dorefa shift = step/2 #kernel = model.module.conv2.float_weight kernel = model.module.conv2.weight #sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv2']))-1)),2)) sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa step = 1/(2**(model.module.B3.clone())-0.5) # dorefa #step = 1/(2**(3)-0.5) # dorefa shift = step/2 #kernel = model.module.fc1.float_weight kernel = model.module.fc1.weight #sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc1']))-1)),2)) sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa step = 1/(2**(model.module.B4.clone())-0.5) # dorefa #step = 1/(2**(3)-0.5) # dorefa shift = step/2 #kernel = model.module.fc2.float_weight kernel = model.module.fc2.weight #sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc2']))-1)),2)) sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa step = 1/(2**(model.module.B5.clone())-0.5) # dorefa #step = 1/(2**(4)-0.5) # dorefa shift = step/2 #kernel = model.module.fc3.float_weight kernel = model.module.fc3.weight #sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc3']))-1)),2)) sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa # ---------------------------------- sin2_reg_loss = sin2_func_1 + sin2_func_2 + sin2_func_3 + sin2_func_4 + sin2_func_5 freq_loss = model.module.B1 + model.module.B2 + model.module.B3 + model.module.B4 + model.module.B5 #sin2_reg_loss = sin2_func_1 + sin2_func_3 + sin2_func_4 #loss = criterion(output, target) """ settings 0 """ #if train_step > 100: # lambda_q = 1 # lambda_f = 0.05 #else: # lambda_q = 0 # lambda_f = 0 """ settings 1 """ #lambda_q = (1/torch.exp(torch.tensor(4.0))).to('cuda')*torch.exp(torch.tensor(4*int(epoch)/1000)).to('cuda')# rising1 #lambda_f = 0.05 #lambda_qp = (1/np.exp(4))*torch.exp(torch.from_numpy(np.array(4*epoch/500))).cpu().numpy().data # rising1 #lambda_fp = lambda_f """ settings 2: step-like lambda """ r = 0.2*args.epochs d = 0.8*args.epochs s = 20 f1 = 0.5 * (1+torch.tanh(torch.tensor((epoch-r)/s).to('cuda'))); f2 = 0.5 * (1+torch.tanh(torch.tensor((epoch-d)/s).to('cuda'))); lambda_q = f1 #lambda_f_value = 0.02*(f1-f2) lambda_f = 0.03 reg_loss = lambda_q * sin2_reg_loss loss = criterion(output, target) + reg_loss + (lambda_f * freq_loss) #print('sin2_reg_LOSS:', sin2_reg_loss.data[0]) #print('total_LOSS:', loss.data[0]) #print('MODEL:', (model.state_dict())) # ------------------------------------------------------------------ AHMED edit sin2-reg - April19 # Measure accuracy and record loss classerr.add(output.data, target) else: # Measure accuracy and record loss loss = earlyexit_loss(output, target, criterion, args) losses[OBJECTIVE_LOSS_KEY].add(loss.item()) #print('sin2_reg_LOSS:', sin2_reg_loss.data[0]) if compression_scheduler: # Before running the backward phase, we allow the scheduler to modify the loss # (e.g. add regularization loss) agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss, optimizer=optimizer, return_loss_components=True) loss = agg_loss.overall_loss losses[OVERALL_LOSS_KEY].add(loss.item()) for lc in agg_loss.loss_components: if lc.name not in losses: losses[lc.name] = tnt.AverageValueMeter() losses[lc.name].add(lc.value.item()) # Compute the gradient and do SGD step optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() if compression_scheduler: compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer) # measure elapsed time batch_time.add(time.time() - end) steps_completed = (train_step+1) if steps_completed % args.print_freq == 0: # Log some statistics errs = OrderedDict() if not args.earlyexit_lossweights: errs['Top1'] = classerr.value(1) errs['Top5'] = classerr.value(5) else: # for Early Exit case, the Top1 and Top5 stats are computed for each exit. for exitnum in range(args.num_exits): errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1) errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5) stats_dict = OrderedDict() for loss_name, meter in losses.items(): stats_dict[loss_name] = meter.mean stats_dict.update(errs) stats_dict['LR'] = optimizer.param_groups[0]['lr'] stats_dict['Time'] = batch_time.mean stats = ('Peformance/Training/', stats_dict) params = model.named_parameters() if args.log_params_histograms else None distiller.log_training_progress(stats, params, epoch, steps_completed, steps_per_epoch, args.print_freq, loggers) end = time.time() kernel = model.module.conv1.weight #kernel = model.module.conv1.float_weight print('00000000000000000000') w1 = kernel.data.cpu().numpy() np.save('w1_cifar', w1) print('======================================', reg_loss.data[0]) print('learned bitwidths', model.module.B1.data.cpu().numpy()[0], model.module.B2.data.cpu().numpy()[0], model.module.B3.data.cpu().numpy()[0], model.module.B4.data.cpu().numpy()[0], model.module.B5.data.cpu().numpy()[0])
def mish(x): return x * torch.tanh(nn.functional.softplus(x))
def val(epoch, writer_val): # define meters loss_meter, iou_meter = AverageMeter(), AverageMeter() # put model into eval mode model.eval() with torch.no_grad(): for i, sample in enumerate(tqdm(val_dataset_it)): im = sample['image'] instances = sample['instance'].squeeze() class_labels = sample['label'].squeeze() output = model(im) loss = criterion(output, instances, class_labels, **args['loss_w'], iou=True, iou_meter=iou_meter) loss = loss.mean() if args['display'] and i % args['display_it'] == 0: with torch.no_grad(): visualizer.display(im[0], 'image') predictions = cluster.cluster_with_gt( output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) visualizer.display([predictions.cpu(), instances[0].cpu()], 'pred') sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 visualizer.display(sigma, 'sigma') seed = torch.sigmoid(output[0][3]).cpu() visualizer.display(seed, 'seed') loss_meter.update(loss.item()) if args['tensorboard']: with torch.no_grad(): color_map = draw_flow(torch.tanh(output[0][0:2])) seed = torch.sigmoid(output[0][3:11]).cpu() sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) color_map = color_map.transpose(2, 0, 1) seed_visual = seed.unsqueeze(1) seed_show = vutils.make_grid(seed_visual, nrow=8, normalize=True, scale_each=True) writer_val.add_image('Input', im[0], epoch) writer_val.add_image('InstanceGT', instances[0].unsqueeze(0).cpu().numpy(), epoch) writer_val.add_image('ColorMap', color_map, epoch) writer_val.add_image('SeedMap', seed_show, epoch) writer_val.add_image('SigmaMap', sigma.unsqueeze(0).cpu().numpy(), epoch) #writer_val.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch) return loss_meter.avg, iou_meter.avg
def single_test(self, jpg=None, fls=None, filename=None, prefix='', grey_only=False): import time st = time.time() self.G.eval() if(jpg is None): jpg = glob.glob1(self.opt_parser.single_test, '*.jpg')[0] jpg = cv2.imread(os.path.join(self.opt_parser.single_test, jpg)) if(fls is None): fls = glob.glob1(self.opt_parser.single_test, '*.txt')[0] fls = np.loadtxt(os.path.join(self.opt_parser.single_test, fls)) fls = fls * 95 fls[:, 0::3] += 130 fls[:, 1::3] += 80 writer = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 62.5, (256 * 3, 256)) for i, frame in enumerate(fls): img_fl = np.ones(shape=(256, 256, 3)) * 255 fl = frame.astype(int) img_fl = vis_landmark_on_img(img_fl, np.reshape(fl, (68, 3))) frame = np.concatenate((img_fl, jpg), axis=2).astype(np.float32)/255.0 image_in, image_out = frame.transpose((2, 0, 1)), np.zeros(shape=(3, 256, 256)) # image_in, image_out = frame.transpose((2, 1, 0)), np.zeros(shape=(3, 256, 256)) image_in, image_out = torch.tensor(image_in, requires_grad=False), \ torch.tensor(image_out, requires_grad=False) image_in, image_out = image_in.reshape(-1, 6, 256, 256), image_out.reshape(-1, 3, 256, 256) image_in, image_out = image_in.to(device), image_out.to(device) g_out = self.G(image_in) g_out = torch.tanh(g_out) g_out = g_out.cpu().detach().numpy().transpose((0, 2, 3, 1)) g_out[g_out < 0] = 0 ref_in = image_in[:, 3:6, :, :].cpu().detach().numpy().transpose((0, 2, 3, 1)) fls_in = image_in[:, 0:3, :, :].cpu().detach().numpy().transpose((0, 2, 3, 1)) # g_out = g_out.cpu().detach().numpy().transpose((0, 3, 2, 1)) # g_out[g_out < 0] = 0 # ref_in = image_in[:, 3:6, :, :].cpu().detach().numpy().transpose((0, 3, 2, 1)) # fls_in = image_in[:, 0:3, :, :].cpu().detach().numpy().transpose((0, 3, 2, 1)) if(grey_only): g_out_grey =np.mean(g_out, axis=3, keepdims=True) g_out[:, :, :, 0:1] = g_out[:, :, :, 1:2] = g_out[:, :, :, 2:3] = g_out_grey for i in range(g_out.shape[0]): frame = np.concatenate((ref_in[i], g_out[i], fls_in[i]), axis=1) * 255.0 writer.write(frame.astype(np.uint8)) writer.release() print('Time - only video:', time.time() - st) if(filename is None): filename = 'v' os.system('ffmpeg -loglevel error -y -i out.mp4 -i {} -pix_fmt yuv420p -strict -2 examples/{}_{}.mp4'.format( 'examples/'+filename[9:-16]+'.wav', prefix, filename[:-4])) # os.system('rm out.mp4') print('Time - ffmpeg add audio:', time.time() - st)
def test(self): if (self.opt_parser.use_vox_dataset == 'raw'): if(self.opt_parser.add_audio_in): from src.dataset.image_translation.image_translation_dataset import \ image_translation_raw98_with_audio_test_dataset as image_translation_test_dataset else: from src.dataset.image_translation.image_translation_dataset import image_translation_raw98_test_dataset as image_translation_test_dataset else: from src.dataset.image_translation.image_translation_dataset import image_translation_preprocessed98_test_dataset as image_translation_test_dataset self.dataset = image_translation_test_dataset(num_frames=self.opt_parser.num_frames) self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=1, shuffle=True, num_workers=self.opt_parser.num_workers) self.G.eval() for i, batch in enumerate(self.dataloader): print(i, 50) if (i > 50): break if (self.opt_parser.add_audio_in): image_in, image_out, audio_in = batch audio_in = audio_in.reshape(-1, 1, 256, 256).to(device) else: image_in, image_out = batch # # online landmark (AwingNet) with torch.no_grad(): image_in, image_out = \ image_in.reshape(-1, 3, 256, 256).to(device), image_out.reshape(-1, 3, 256, 256).to(device) pred_landmarks = [] for j in range(image_in.shape[0] // 16): inputs = image_out[j*16:j*16+16] outputs, boundary_channels = self.fa_model(inputs) pred_heatmap = outputs[-1][:, :-1, :, :].detach().cpu() pred_landmark, _ = get_preds_fromhm(pred_heatmap) pred_landmarks.append(pred_landmark.numpy() * 4) pred_landmarks = np.concatenate(pred_landmarks, axis=0) # draw landmark on while bg img_fls = [] for pred_fl in pred_landmarks: img_fl = np.ones(shape=(256, 256, 3)) * 255.0 img_fl = vis_landmark_on_img98(img_fl, pred_fl) # 98x2 img_fls.append(img_fl.transpose((2, 0, 1))) img_fls = np.stack(img_fls, axis=0).astype(np.float32) / 255.0 image_fls_in = torch.tensor(img_fls, requires_grad=False).to(device) if (self.opt_parser.add_audio_in): # print(image_fls_in.shape, image_in.shape, audio_in.shape) image_in = torch.cat([image_fls_in, image_in[0:image_fls_in.shape[0]], audio_in[0:image_fls_in.shape[0]]], dim=1) else: image_in = torch.cat([image_fls_in, image_in[0:image_fls_in.shape[0]]], dim=1) # normal 68 test dataset # image_in, image_out = image_in.reshape(-1, 6, 256, 256), image_out.reshape(-1, 3, 256, 256) # random single frame # cv2.imwrite('random_img_{}.jpg'.format(i), np.swapaxes(image_out[5].numpy(),0, 2)*255.0) image_in, image_out = image_in.to(device), image_out.to(device) writer = cv2.VideoWriter('tmp_{:04d}.mp4'.format(i), cv2.VideoWriter_fourcc(*'mp4v'), 25, (256*4, 256)) for j in range(image_in.shape[0] // 16): g_out = self.G(image_in[j*16:j*16+16]) g_out = torch.tanh(g_out) # norm 68 pts # g_out = np.swapaxes(g_out.cpu().detach().numpy(), 1, 3) # ref_out = np.swapaxes(image_out[j*16:j*16+16].cpu().detach().numpy(), 1, 3) # ref_in = np.swapaxes(image_in[j*16:j*16+16, 3:6, :, :].cpu().detach().numpy(), 1, 3) # fls_in = np.swapaxes(image_in[j * 16:j * 16 + 16, 0:3, :, :].cpu().detach().numpy(), 1, 3) g_out = g_out.cpu().detach().numpy().transpose((0, 2, 3, 1)) g_out[g_out < 0] = 0 ref_out = image_out[j * 16:j * 16 + 16].cpu().detach().numpy().transpose((0, 2, 3, 1)) ref_in = image_in[j * 16:j * 16 + 16, 3:6, :, :].cpu().detach().numpy().transpose((0, 2, 3, 1)) fls_in = image_in[j * 16:j * 16 + 16, 0:3, :, :].cpu().detach().numpy().transpose((0, 2, 3, 1)) for k in range(g_out.shape[0]): frame = np.concatenate((ref_in[k], g_out[k], fls_in[k], ref_out[k]), axis=1) * 255.0 writer.write(frame.astype(np.uint8)) writer.release() os.system('ffmpeg -y -i tmp_{:04d}.mp4 -pix_fmt yuv420p random_{:04d}.mp4'.format(i, i)) os.system('rm tmp_{:04d}.mp4'.format(i))
def step( self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None ### YOUR CODE HERE (~3 Lines) ### TODO: ### 1. Apply the decoder to `Ybar_t` and `dec_state`to obtain the new dec_state. ### 2. Split dec_state into its two parts (dec_hidden, dec_cell) ### 3. Compute the attention scores e_t, a Tensor shape (b, src_len). ### Note: b = batch_size, src_len = maximum source length, h = hidden size. ### ### Hints: ### - dec_hidden is shape (b, h) and corresponds to h^dec_t in the PDF (batched) ### - enc_hiddens_proj is shape (b, src_len, h) and corresponds to W_{attProj} h^enc (batched). ### - Use batched matrix multiplication (torch.bmm) to compute e_t. ### - To get the tensors into the right shapes for bmm, you will need to do some squeezing and unsqueezing. ### - When using the squeeze() function make sure to specify the dimension you want to squeeze ### over. Otherwise, you will remove the batch dimension accidentally, if batch_size = 1. ### ### Use the following docs to implement this functionality: ### Batch Multiplication: ### https://pytorch.org/docs/stable/torch.html#torch.bmm ### Tensor Unsqueeze: ### https://pytorch.org/docs/stable/torch.html#torch.unsqueeze ### Tensor Squeeze: ### https://pytorch.org/docs/stable/torch.html#torch.squeeze dec_state = self.decoder(Ybar_t, dec_state) dec_hidden, dec_cell = dec_state e_t = torch.squeeze( torch.bmm(enc_hiddens_proj, torch.unsqueeze(dec_hidden, 2)), 2) ### END YOUR CODE # Set e_t to -inf where enc_masks has 1 # enc_mask makes the probability of <paded> approaching 0 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.byte(), -float('inf')) ### YOUR CODE HERE (~6 Lines) ### TODO: ### 1. Apply softmax to e_t to yield alpha_t ### 2. Use batched matrix multiplication between alpha_t and enc_hiddens to obtain the ### attention output vector, a_t. #$$ Hints: ### - alpha_t is shape (b, src_len) ### - enc_hiddens is shape (b, src_len, 2h) ### - a_t should be shape (b, 2h) ### - You will need to do some squeezing and unsqueezing. ### Note: b = batch size, src_len = maximum source length, h = hidden size. ### ### 3. Concatenate dec_hidden with a_t to compute tensor U_t ### 4. Apply the combined output projection layer to U_t to compute tensor V_t ### 5. Compute tensor O_t by first applying the Tanh function and then the dropout layer. ### ### Use the following docs to implement this functionality: ### Softmax: ### https://pytorch.org/docs/stable/nn.html#torch.nn.functional.softmax ### Batch Multiplication: ### https://pytorch.org/docs/stable/torch.html#torch.bmm ### Tensor View: ### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view ### Tensor Concatenation: ### https://pytorch.org/docs/stable/torch.html#torch.cat ### Tanh: ### https://pytorch.org/docs/stable/torch.html#torch.tanh alpha_t = F.softmax(e_t, dim=1) # attention vector a_t = torch.squeeze(torch.bmm(torch.unsqueeze(alpha_t, 1), enc_hiddens), 1) # context vector U_t = torch.cat((a_t, dec_hidden), 1) V_t = self.combined_output_projection(U_t) O_t = self.dropout(torch.tanh(V_t)) ### END YOUR CODE combined_output = O_t return dec_state, combined_output, e_t
def mattanh(*args): return torch.tanh(args[0])
def forward(self, in_modalities): umask = in_modalities[-1] in_modalities = in_modalities[:-2] batch_size = in_modalities[0].shape[0] time_stamps = in_modalities[0].shape[1] #Unimodal all_h = [] for modality, dim, lstm, dropout, fc in zip(in_modalities, self.hidden_dims, self.lstms, self.drop_outs, self.fcs): self.h = torch.zeros(batch_size, dim).to(self.device) self.c = torch.zeros(batch_size, dim).to(self.device) h = [] for t in range(time_stamps): #Apply the mask dirrectly on the data input_u = modality[:, t, :] * umask[:, t].unsqueeze(dim=-1) self.h, self.c = lstm(input_u, (self.h, self.c)) self.h = torch.tanh(self.h) self.h = dropout(self.h) h.append(torch.tanh(fc(self.h))) all_h.append(h) #Multimodal utterance_features = [torch.stack(h, dim=-2) for h in all_h] dialogue_utterance_feature = torch.cat(utterance_features, dim=-1) self.h_dialogue = torch.zeros(batch_size, self.dialogue_hidden_dim).to(self.device) self.c_dialogue = torch.zeros(batch_size, self.dialogue_hidden_dim).to(self.device) all_h_dialogue = [] for t in range(time_stamps): input_m = dialogue_utterance_feature[:, t, :] * umask[:, t].unsqueeze( dim=-1) self.h_dialogue, self.c_dialogue = self.dialogue_lstm( input_m, (self.h_dialogue, self.c_dialogue)) self.h_dialogue = self.drop_out(self.h_dialogue) all_h_dialogue.append(torch.tanh(self.fc_out(self.h_dialogue))) output_emo = [self.smax_fc_emo(_h) for _h in all_h_dialogue] output_act = [self.smax_fc_act(_h) for _h in all_h_dialogue] #Stack hidden states output_emo = torch.stack(output_emo, dim=-2) output_act = torch.stack(output_act, dim=-2) log_prob_emo = F.log_softmax(output_emo, 2) # batch, seq_len, n_classes log_prob_act = F.log_softmax(output_act, 2) # batch, seq_len, n_classes return log_prob_emo, log_prob_act
def forward(self, state): x = F.relu(self.linear1(state)) x = F.relu(self.linear2(x)) x = torch.tanh(self.output(x)) return x
def gelu(x): return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
def forward(self, state): a = F.relu(self.l1(state)) a = F.relu(self.l2(a)) return self.max_action * torch.tanh(self.l3(a))
def forward(self, x): return 0.5 * x * (1 + torch.tanh( math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
def forward(ctx, x): ctx.save_for_backward(x) return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
def forward(self, batch): # shape: (batch_size, img_feature_size) - CNN fc7 features # shape: (batch_size, num_proposals, img_feature_size) - RCNN features img = batch["img_feat"] # shape: (batch_size, 10, max_sequence_length) ques = batch["ques"] # shape: (batch_size, 10, max_sequence_length * 2 * 10) # concatenated qa * 10 rounds hist = batch["hist"] # num_rounds = 10, even for test (padded dialog rounds at the end) batch_size, num_rounds, max_sequence_length = ques.size() # embed questions ques = ques.view(batch_size * num_rounds, max_sequence_length) ques_embed = self.word_embed(ques) # shape: (batch_size * num_rounds, max_sequence_length, # lstm_hidden_size) _, (ques_embed, _) = self.ques_rnn(ques_embed, batch["ques_len"]) # project down image features and ready for attention # shape: (batch_size, num_proposals, lstm_hidden_size) projected_image_features = self.image_features_projection(img) # repeat image feature vectors to be provided for every round # shape: (batch_size * num_rounds, num_proposals, lstm_hidden_size) projected_image_features = ( projected_image_features.view( batch_size, 1, -1, self.config["lstm_hidden_size"] ) .repeat(1, num_rounds, 1, 1) .view(batch_size * num_rounds, -1, self.config["lstm_hidden_size"]) ) # computing attention weights # shape: (batch_size * num_rounds, num_proposals) projected_ques_features = ques_embed.unsqueeze(1).repeat( 1, img.shape[1], 1 ) projected_ques_image = ( projected_ques_features * projected_image_features ) projected_ques_image = self.dropout(projected_ques_image) image_attention_weights = self.attention_proj( projected_ques_image ).squeeze() image_attention_weights = F.softmax(image_attention_weights, dim=-1) image_attention_scores = image_attention_weights # shape: (batch_size * num_rounds, num_proposals, img_features_size) img = ( img.view(batch_size, 1, -1, self.config["img_feature_size"]) .repeat(1, num_rounds, 1, 1) .view(batch_size * num_rounds, -1, self.config["img_feature_size"]) ) # multiply image features with their attention weights # shape: (batch_size * num_rounds, num_proposals, img_feature_size) image_attention_weights = image_attention_weights.unsqueeze(-1).repeat( 1, 1, self.config["img_feature_size"] ) # shape: (batch_size * num_rounds, img_feature_size) attended_image_features = (image_attention_weights * img).sum(1) img = attended_image_features # embed history hist = hist.view(batch_size * num_rounds, max_sequence_length * 20) hist_embed = self.word_embed(hist) # shape: (batch_size * num_rounds, lstm_hidden_size) _, (hist_embed, _) = self.hist_rnn(hist_embed, batch["hist_len"]) fused_vector = torch.cat((img, ques_embed, hist_embed), 1) fused_vector = self.dropout(fused_vector) fused_embedding = torch.tanh(self.fusion(fused_vector)) # shape: (batch_size, num_rounds, lstm_hidden_size) fused_embedding = fused_embedding.view(batch_size, num_rounds, -1) # old code #return fused_embedding return fused_embedding, image_attention_scores
def __train_pass__(self, epoch, is_training=True): st_epoch = time.time() if(is_training): self.G.train() status = 'TRAIN' else: self.G.eval() status = 'EVAL' g_time = 0.0 for i, batch in enumerate(self.dataloader): if(i >= len(self.dataloader)-2): break st_batch = time.time() if(self.opt_parser.comb_fan_awing): image_in, image_out, fan_pred_landmarks = batch fan_pred_landmarks = fan_pred_landmarks.reshape(-1, 68, 3).detach().cpu().numpy() elif(self.opt_parser.add_audio_in): image_in, image_out, audio_in = batch audio_in = audio_in.reshape(-1, 1, 256, 256).to(device) else: image_in, image_out = batch with torch.no_grad(): # # online landmark (AwingNet) image_in, image_out = \ image_in.reshape(-1, 3, 256, 256).to(device), image_out.reshape(-1, 3, 256, 256).to(device) inputs = image_out outputs, boundary_channels = self.fa_model(inputs) pred_heatmap = outputs[-1][:, :-1, :, :].detach().cpu() pred_landmarks, _ = get_preds_fromhm(pred_heatmap) pred_landmarks = pred_landmarks.numpy() * 4 # online landmark (FAN) -> replace jaw + eye brow in AwingNet if(self.opt_parser.comb_fan_awing): fl_jaw_eyebrow = fan_pred_landmarks[:, 0:27, 0:2] fl_rest = pred_landmarks[:, 51:, :] pred_landmarks = np.concatenate([fl_jaw_eyebrow, fl_rest], axis=1).astype(np.int) # draw landmark on while bg img_fls = [] for pred_fl in pred_landmarks: img_fl = np.ones(shape=(256, 256, 3)) * 255.0 if(self.opt_parser.comb_fan_awing): img_fl = vis_landmark_on_img74(img_fl, pred_fl) # 74x2 else: img_fl = vis_landmark_on_img98(img_fl, pred_fl) # 98x2 img_fls.append(img_fl.transpose((2, 0, 1))) img_fls = np.stack(img_fls, axis=0).astype(np.float32) / 255.0 image_fls_in = torch.tensor(img_fls, requires_grad=False).to(device) if(self.opt_parser.add_audio_in): # print(image_fls_in.shape, image_in.shape, audio_in.shape) image_in = torch.cat([image_fls_in, image_in, audio_in], dim=1) else: image_in = torch.cat([image_fls_in, image_in], dim=1) # image_in, image_out = \ # image_in.reshape(-1, 6, 256, 256).to(device), image_out.reshape(-1, 3, 256, 256).to(device) # image2image net fp g_out = self.G(image_in) g_out = torch.tanh(g_out) loss_l1 = self.criterionL1(g_out, image_out) loss_vgg, loss_style = self.criterionVGG(g_out, image_out, style=True) loss_vgg, loss_style = torch.mean(loss_vgg), torch.mean(loss_style) loss = loss_l1 + loss_vgg + loss_style if(is_training): self.optimizer.zero_grad() loss.backward() self.optimizer.step() # log if(self.opt_parser.write): self.writer.add_scalar('loss', loss.cpu().detach().numpy(), self.count) self.writer.add_scalar('loss_l1', loss_l1.cpu().detach().numpy(), self.count) self.writer.add_scalar('loss_vgg', loss_vgg.cpu().detach().numpy(), self.count) self.count += 1 # save image to track training process if (i % self.opt_parser.jpg_freq == 0): vis_in = np.concatenate([image_in[0, 3:6].cpu().detach().numpy().transpose((1, 2, 0)), image_in[0, 0:3].cpu().detach().numpy().transpose((1, 2, 0))], axis=1) vis_out = np.concatenate([image_out[0].cpu().detach().numpy().transpose((1, 2, 0)), g_out[0].cpu().detach().numpy().transpose((1, 2, 0))], axis=1) vis = np.concatenate([vis_in, vis_out], axis=0) try: os.makedirs(os.path.join(self.opt_parser.jpg_dir, self.opt_parser.name)) except: pass cv2.imwrite(os.path.join(self.opt_parser.jpg_dir, self.opt_parser.name, 'e{:03d}_b{:04d}.jpg'.format(epoch, i)), vis * 255.0) # save ckpt if (i % self.opt_parser.ckpt_last_freq == 0): self.__save_model__('last', epoch) print("Epoch {}, Batch {}/{}, loss {:.4f}, l1 {:.4f}, vggloss {:.4f}, styleloss {:.4f} time {:.4f}".format( epoch, i, len(self.dataset) // self.opt_parser.batch_size, loss.cpu().detach().numpy(), loss_l1.cpu().detach().numpy(), loss_vgg.cpu().detach().numpy(), loss_style.cpu().detach().numpy(), time.time() - st_batch)) g_time += time.time() - st_batch if(self.opt_parser.test_speed): if(i >= 100): break print('Epoch time usage:', time.time() - st_epoch, 'I/O time usage:', time.time() - st_epoch - g_time, '\n=========================') if(self.opt_parser.test_speed): exit(0) if(epoch % self.opt_parser.ckpt_epoch_freq == 0): self.__save_model__('{:02d}'.format(epoch), epoch)
def forward(self, inputs): x = inputs x = torch.tanh(self.linear1(x)) action_scores = self.linear2(x) return F.softmax(action_scores)
def forward(self, node_stacks, left_childs, encoder_outputs, num_pades, padding_hidden, seq_mask, mask_nums): current_embeddings = [] for st in node_stacks: if len(st) == 0: current_embeddings.append(padding_hidden) else: current_node = st[-1] current_embeddings.append(current_node.embedding) current_node_temp = [] for l, c in zip(left_childs, current_embeddings): if l is None: c = self.dropout(c) g = torch.tanh(self.concat_l(c)) t = torch.sigmoid(self.concat_lg(c)) current_node_temp.append(g * t) else: ld = self.dropout(l) c = self.dropout(c) g = torch.tanh(self.concat_r(torch.cat((ld, c), 1))) t = torch.sigmoid(self.concat_rg(torch.cat((ld, c), 1))) current_node_temp.append(g * t) current_node = torch.stack(current_node_temp) current_embeddings = self.dropout(current_node) current_attn = self.attn(current_embeddings.transpose(0, 1), encoder_outputs, seq_mask) current_context = current_attn.bmm(encoder_outputs.transpose( 0, 1)) # B x 1 x N # the information to get the current quantity batch_size = current_embeddings.size(0) # predict the output (this node corresponding to output(number or operator)) with PADE repeat_dims = [1] * self.embedding_weight.dim() repeat_dims[0] = batch_size embedding_weight = self.embedding_weight.repeat( *repeat_dims) # B x input_size x N embedding_weight = torch.cat((embedding_weight, num_pades), dim=1) # B x O x N leaf_input = torch.cat((current_node, current_context), 2) leaf_input = leaf_input.squeeze(1) leaf_input = self.dropout(leaf_input) # p_leaf = nn.functional.softmax(self.is_leaf(leaf_input), 1) # max pooling the embedding_weight embedding_weight_ = self.dropout(embedding_weight) num_score = self.score(leaf_input.unsqueeze(1), embedding_weight_, mask_nums) # num_score = nn.functional.softmax(num_score, 1) op = self.ops(leaf_input) # return p_leaf, num_score, op, current_embeddings, current_attn return num_score, op, current_node, current_context, embedding_weight
def forward(self, input_features): pre_out = self.pre_net(input_features) q_in = torch.tanh(pre_out) * np.pi / 2.0 q_out = self.q_net(q_in) return self.post_net(q_out)
def forward(self, B, S): h = self.func(self.line_1(torch.cat((B, S), dim=1))) h = self.func(self.line_2(h)) return torch.tanh(self.line_3(h))
def tanh2(x, min_y, max_y): scale_x = 1 / ((max_y - min_y) / 2) return (max_y - min_y) / 2 * (torch.tanh(x * scale_x) + 1.0) + min_y
def forward(self, x): x = x * (torch.tanh(F.softplus(x))) return x
def forward(self, x): cdf = 0.5 * (1.0 + torch.tanh( math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) return x * cdf
def forward(self, input): output = self.conv(input) output = torch.tanh(output) output = smooth_binary(output) return output
def forward(self, prediction): for layer in self.layers: prediction = torch.tanh(layer(prediction)) return prediction
def forward(self, input): in_len = input.size(3) if in_len < self.receptive_field: x = nn.functional.pad(input, (self.receptive_field - in_len, 0, 0, 0)) else: x = input x = self.start_conv(x) skip = 0 # calculate the current adaptive adj matrix once per iteration new_supports = None if self.gcn_bool and self.addaptadj and self.supports is not None: adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1) new_supports = self.supports + [adp] # WaveNet layers for i in range(self.blocks * self.layers): # |----------------------------------------| *residual* # | | # | |-- conv -- tanh --| | # -> dilate -|----| * ----|-- 1x1 -- + --> *input* # |-- conv -- sigm --| | # 1x1 # | # ---------------------------------------> + -------------> *skip* # (dilation, init_dilation) = self.dilations[i] # residual = dilation_func(x, dilation, init_dilation, i) residual = x # dilated convolution filter = self.filter_convs[i](residual) filter = torch.tanh(filter) gate = self.gate_convs[i](residual) gate = torch.sigmoid(gate) x = filter * gate # parametrized skip connection s = x s = self.skip_convs[i](s) try: skip = skip[:, :, :, -s.size(3):] except: skip = 0 skip = s + skip if self.gcn_bool and self.supports is not None: if self.addaptadj: x = self.gconv[i](x, new_supports) x = self.pn[i](x) if self.att_bool: x = self.att_conv[i](x) else: x = self.gconv[i](x, self.supports) x = self.pn[i](x) if self.att_bool: x = self.att_conv[i](x) else: x = self.residual_convs[i](x) x = x + residual[:, :, :, -x.size(3):] x = self.bn[i](x) x = F.relu(skip) x = F.relu(self.end_conv_1(x)) x = self.end_conv_2(x) return x
def train(epoch, writer): # define meters loss_meter = AverageMeter() # put model into training mode model.train() # set this only when it is finetuning # for module in model.modules(): # if isinstance(module, torch.nn.modules.BatchNorm1d): # module.eval() # if isinstance(module, torch.nn.modules.BatchNorm2d): # module.eval() # if isinstance(module, torch.nn.modules.BatchNorm3d): # module.eval() for param_group in optimizer.param_groups: print('learning rate: {}'.format(param_group['lr'])) for i, sample in enumerate(tqdm(train_dataset_it)): im = sample['image'] instances = sample['instance'].squeeze() class_labels = sample['label'].squeeze() output = model(im) loss = criterion(output, instances, class_labels, **args['loss_w']) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() #output.detach().cpu() #torch.cuda.empty_cache() if args['display'] and i % args['display_it'] == 0: with torch.no_grad(): visualizer.display(im[0], 'image') predictions = cluster.cluster_with_gt( output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) visualizer.display([predictions.cpu(), instances[0].cpu()], 'pred') sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 visualizer.display(sigma, 'sigma') seed = torch.sigmoid(output[0][3]).cpu() visualizer.display(seed, 'seed') loss_meter.update(loss.item()) if args['tensorboard']: with torch.no_grad(): color_map = draw_flow(torch.tanh(output[0][0:2])) seed = torch.sigmoid(output[0][3:11]).cpu() sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) color_map = color_map.transpose(2, 0, 1) seed_visual = seed.unsqueeze(1) seed_show = vutils.make_grid(seed_visual, nrow=8, normalize=True, scale_each=True) writer.add_image('Input', im[0], epoch) writer.add_image('InstanceGT', instances[0].unsqueeze(0).cpu().numpy(), epoch) writer.add_image('ColorMap', color_map, epoch) writer.add_image('SeedMap', seed_show, epoch) writer.add_image('SigmaMap', sigma.unsqueeze(0).cpu().numpy(), epoch) #writer.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch) return loss_meter.avg