def pointwise_loss(positive_predictions, negative_predictions, mask=None): """ Logistic loss function. Parameters ---------- positive_predictions: tensor Tensor containing predictions for known positive items. negative_predictions: tensor Tensor containing predictions for sampled negative items. mask: tensor, optional A binary tensor used to zero the loss from some entries of the loss tensor. Returns ------- loss, float The mean value of the loss function. """ positives_loss = (1.0 - F.sigmoid(positive_predictions)) negatives_loss = F.sigmoid(negative_predictions) loss = (positives_loss + negatives_loss) if mask is not None: mask = mask.float() loss = loss * mask return loss.sum() / mask.sum() return loss.mean()
def forward(self, title, pg): r_gate = F.sigmoid(self.wrx(title) + self.wrh(pg)) i_gate = F.sigmoid(self.wix(title) + self.wih(pg)) n_gate = F.tanh(self.wnx(title) + torch.mul(r_gate, self.wnh(pg))) result = torch.mul(i_gate, pg) + torch.mul(torch.add(-i_gate, 1), n_gate) return result
def PeepholeLSTMCell(input: torch.Tensor, hidden: Tuple[torch.Tensor, torch.Tensor], w_ih: torch.Tensor, w_hh: torch.Tensor, w_ip: torch.Tensor, w_fp: torch.Tensor, w_op: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ An LSTM cell with peephole connections without biases. Mostly ripped from the pytorch autograd lstm implementation. """ hx, cx = hidden gates = F.linear(input, w_ih) + F.linear(hx, w_hh) ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) peep_i = w_ip.unsqueeze(0).expand_as(cx) * cx ingate = ingate + peep_i peep_f = w_fp.unsqueeze(0).expand_as(cx) * cx forgetgate = forgetgate + peep_f ingate = F.sigmoid(ingate) forgetgate = F.sigmoid(forgetgate) cellgate = F.tanh(cellgate) cy = (forgetgate * cx) + (ingate * cellgate) peep_o = w_op.unsqueeze(0).expand_as(cy) * cy outgate = outgate + peep_o hy = outgate * F.tanh(cy) return hy, cy
def forward(self, x): x1 = self.inc(x) x2 = self.down1(x1) x3 = self.down2(x2) LocOut=self.LocTop(x3) LocOut=F.softmax(LocOut) RoIs=self.Localization(LocOut,Train=False) P_Region=[] P_Contour=[] for i in range(len(RoIs)): Zstart=RoIs[i][0] Ystart=RoIs[i][1] Xstart=RoIs[i][2] Zend=RoIs[i][3] Yend=RoIs[i][4] Xend=RoIs[i][5] #RoI Cropping Layer x3_RoI=x3[:,:,Zstart:Zend,Ystart:Yend,Xstart:Xend] x2_RoI=x2[:,:,Zstart*2:Zend*2,Ystart*2:Yend*2,Xstart*2:Xend*2] x1_RoI=x1[:,:,Zstart*2:Zend*2,Ystart*4:Yend*4,Xstart*4:Xend*4] p = self.up1(x3_RoI,x2_RoI) p = self.up2(p, x1_RoI) p_r = self.SegTop1(p) p_r = F.sigmoid(p_r) p_r = p_r.to('cpu').detach().numpy() P_Region.append(p_r) p_c = self.SegTop2(p) p_c = F.sigmoid(p_c) p_c = p_c.to('cpu').detach().numpy() P_Contour.append(p_c) return P_Region,P_Contour,RoIs
def norm_flow(self, params, z, v, logposterior): h = F.tanh(params[0][0](z)) mew_ = params[0][1](h) sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z] z_reshaped = z.view(self.P, self.B, self.z_size) gradients = torch.autograd.grad(outputs=logposterior(z_reshaped), inputs=z_reshaped, grad_outputs=self.grad_outputs, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.detach() gradients = gradients.view(-1,self.z_size) v = v*sig_ + mew_*gradients logdet = torch.sum(torch.log(sig_), 1) h = F.tanh(params[1][0](v)) mew_ = params[1][1](h) sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z] z = z*sig_ + mew_*v logdet2 = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z, v, logdet
def norm_flow(self, params, z, v): # print (z.size()) h = F.tanh(params[0][0](z)) mew_ = params[0][1](h) sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z] # print (v.size()) # print (mew_.size()) # print (self.B) # print (self.P) v = v*sig_ + mew_ logdet = torch.sum(torch.log(sig_), 1) h = F.tanh(params[1][0](v)) mew_ = params[1][1](h) sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z] z = z*sig_ + mew_ logdet2 = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z, v, logdet
def forward(self, xt, img_fc, state): hs = [] cs = [] for L in range(self.num_layers): # c,h from previous timesteps prev_h = state[0][L] prev_c = state[1][L] # the input to this layer if L == 0: x = xt i2h = self.w2h(x) + self.v2h(img_fc) else: x = hs[-1] x = F.dropout(x, self.drop_prob_lm, self.training) i2h = self.i2h[L-1](x) all_input_sums = i2h+self.h2h[L](prev_h) sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size) sigmoid_chunk = F.sigmoid(sigmoid_chunk) # decode the gates in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size) forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size) out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size) # decode the write inputs if not self.use_maxout: in_transform = F.tanh(all_input_sums.narrow(1, 3 * self.rnn_size, self.rnn_size)) else: in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size) in_transform = torch.max(\ in_transform.narrow(1, 0, self.rnn_size), in_transform.narrow(1, self.rnn_size, self.rnn_size)) # perform the LSTM update next_c = forget_gate * prev_c + in_gate * in_transform # gated cells form the output tanh_nex_c = F.tanh(next_c) next_h = out_gate * tanh_nex_c if L == self.num_layers-1: if L == 0: i2h = self.r_w2h(x) + self.r_v2h(img_fc) else: i2h = self.r_i2h(x) n5 = i2h+self.r_h2h(prev_h) fake_region = F.sigmoid(n5) * tanh_nex_c cs.append(next_c) hs.append(next_h) # set up the decoder top_h = hs[-1] top_h = F.dropout(top_h, self.drop_prob_lm, self.training) fake_region = F.dropout(fake_region, self.drop_prob_lm, self.training) state = (torch.cat([_.unsqueeze(0) for _ in hs], 0), torch.cat([_.unsqueeze(0) for _ in cs], 0)) return top_h, fake_region, state
def forward(self, x): # reshape input first with batch size tracked x = x.view(x.size(0), -1) # use required layers x = self.dropout(F.sigmoid(self.fc1(x))) x = self.dropout(F.sigmoid(self.fc2(x))) x = F.sigmoid(self.fc3(x)) x = self.fc4(x) return x
def forward(self, x): emb = self.emb(x).unsqueeze(1) # batch_size * 1 * seq_len * emb_dim convs = [F.relu(conv(emb)).squeeze(3) for conv in self.convs] # [batch_size * num_filter * length] pools = [F.max_pool1d(conv, conv.size(2)).squeeze(2) for conv in convs] # [batch_size * num_filter] pred = torch.cat(pools, 1) # batch_size * num_filters_sum highway = self.highway(pred) pred = F.sigmoid(highway) * F.relu(highway) + (1. - F.sigmoid(highway)) * pred pred = self.softmax(self.lin(self.dropout(pred))) return pred
def forward(self, inputs, future=0): outputs = [] hids = [] states = [] for _ in range(self.num_hid_layers): hids.append(Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False)) states.append(Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False)) # h_t = Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False) # c_t = Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False) # h_t2 = Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False) # c_t2 = Variable(torch.zeros(1, self.hidden_size).double(), requires_grad=False) # for i, input_t in enumerate(inputs.chunk(inputs.size(1), dim=1)): # print(inputs.size()) for i in range(inputs.size(0)): input_t = inputs[i, :] # print(input_t.size()) input_t = F.sigmoid(self.layers['sigmoid'](input_t)) h = 0 val = input_t for k in self.layers: if k != 'linear' and k != 'sigmoid': hids[h], states[h] = self.layers[k](val, (hids[h], states[h])) val = hids[h] h += 1 # h_t, c_t = self.lstm1(input_t, (h_t, c_t)) # h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) # h_t = self.gru1(input_t, h_t) # h_t2 = self.gru2(h_t, h_t2) # print(h_t2.size()) output = self.layers['linear'](hids[-1]) outputs += [output] # print(output) for i in range(future): # h_t, c_t = self.lstm1(output, (h_t, c_t)) # h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) output = F.sigmoid(self.layers['sigmoid'](output)) h = 0 val = output for k in self.layers: if k != 'linear' and k != 'sigmoid': hids[h], states[h] = self.layers[k](val, (hids[h], states[h])) val = hids[h] h += 1 # h_t = self.gru1(input_t, h_t) # h_t2 = self.gru2(h_t, h_t2) # output = self.linear(h_t2) output = self.layers['linear'](hids[-1]) outputs += [output] outputs = torch.stack(outputs, 0).squeeze(2) return outputs
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): hx, cx = hidden gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh) ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) ingate = F.sigmoid(ingate) forgetgate = F.sigmoid(forgetgate) cellgate = F.tanh(cellgate) outgate = F.sigmoid(outgate) cy = (forgetgate * cx) + (ingate * cellgate) hy = outgate * F.tanh(cy) return hy, cy
def forward(self, x1, x2): x1 = F.dropout(F.relu(self.layer1_1(x1.view(-1, 784))), self.drop) x2 = F.dropout(F.relu(self.layer1_2(x2.view(-1, 784))), self.drop) x = F.dropout(F.relu(self.layer2(torch.cat((x1, x2), 1))), self.drop) x = F.dropout(F.relu(self.layer3(x)), self.drop) x = F.dropout(F.relu(self.layer4(x)), self.drop) out1 = F.relu(self.layer5_1(x)) out1 = F.sigmoid(self.layer6_1(out1)) out2 = F.relu(self.layer5_2(x)) out2 = F.sigmoid(self.layer6_2(out2)) return out1, out2
def step(self, real_data, verbose: bool = False): batch_size = real_data.shape[0] real_dis_logit, real_hidden = self.model.dis(real_data) latent = self.model.sample_latent(batch_size) fake_data = self.model.gen(latent) fake_dis_logit, fake_hidden = self.model.dis(fake_data.detach()) dis_loss = self.loss_type.discriminator_loss(real_dis_logit, fake_dis_logit) if self.penalty is not None: dis_penalty, grad_norm = self.penalty.penalty(self.model.dis, real_data, fake_data) else: dis_penalty = 0. grad_norm = None self.dis_opt.zero_grad() (dis_loss + dis_penalty).backward(retain_graph=True) self.dis_opt.step() fake_dis_logit, fake_hidden = self.model.dis(fake_data) gen_loss = self.loss_type.generator_loss(fake_dis_logit) self.gen_opt.zero_grad() gen_loss.backward(retain_graph=True) self.gen_opt.step() info_loss = self._information_loss(self.model, fake_hidden, latent) # type: torch.Tensor info_loss *= self.info_weight self.gen_opt.zero_grad() self.dis_opt.zero_grad() self.rec_opt.zero_grad() info_loss.backward() self.gen_opt.step() self.dis_opt.step() self.rec_opt.step() if verbose: real_dis = F.sigmoid(real_dis_logit) fake_dis = F.sigmoid(fake_dis_logit) text = (f"D_loss = {dis_loss.item():.4f}, " f"G_loss = {gen_loss.item():.4f}, " f"MI = {info_loss.item():.4f}, " f"D(x) = {real_dis.mean().item():.4f}, " f"D(G(z)) = {fake_dis.mean().item():.4f}") if self.penalty is not None: text += f", |grad D| = {grad_norm.item():.4f}" print(text)
def forward(self, xt, fc_feats, att_feats, p_att_feats, state): # The p_att_feats here is already projected att_size = att_feats.numel() // att_feats.size(0) // self.att_feat_size att = p_att_feats.view(-1, att_size, self.att_hid_size) att_h = self.h2att(state[0][-1]) # batch * att_hid_size att_h = att_h.unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size dot = att + att_h # batch * att_size * att_hid_size dot = F.tanh(dot) # batch * att_size * att_hid_size dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size dot = self.alpha_net(dot) # (batch * att_size) * 1 dot = dot.view(-1, att_size) # batch * att_size weight = F.softmax(dot) # batch * att_size att_feats_ = att_feats.view(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size all_input_sums = self.i2h(xt) + self.h2h(state[0][-1]) sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size) sigmoid_chunk = F.sigmoid(sigmoid_chunk) in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size) forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size) out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size) in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size) + \ self.a2c(att_res) in_transform = torch.max(\ in_transform.narrow(1, 0, self.rnn_size), in_transform.narrow(1, self.rnn_size, self.rnn_size)) next_c = forget_gate * state[1][-1] + in_gate * in_transform next_h = out_gate * F.tanh(next_c) output = self.dropout(next_h) state = (next_h.unsqueeze(0), next_c.unsqueeze(0)) return output, state
def forward(self, x): x = self.embed(x) x = self.dropout(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)) bilstm_out = bilstm_out.squeeze(2) hidden2lable = self.hidden2label1(F.tanh(bilstm_out)) gate_layer = F.sigmoid(self.gate_layer(bilstm_out)) # calculate highway layer values gate_hidden_layer = torch.mul(hidden2lable, gate_layer) # if write like follow ,can run,but not equal the HighWay NetWorks formula # gate_input = torch.mul((1 - gate_layer), hidden2lable) gate_input = torch.mul((1 - gate_layer), bilstm_out) highway_output = torch.add(gate_hidden_layer, gate_input) logit = self.logit_layer(highway_output) return logit
def execute(points, K, n_samples, sigma2, reg_factor, mode='test'): bs, N, _ = points.size() e = torch.zeros(bs, N).type(dtype_l) input, dists = create_input(points.data, sigma2) loss_total = Variable(torch.zeros(1).type(dtype)) for k in range(K): scores,_ = gnn(input) probs = F.sigmoid(scores) if mode == 'train': variance = compute_variance(e, probs) variance = variance.sum() / bs Lgp = Variable(torch.zeros(n_samples, bs).type(dtype)) Reward2 = Variable(torch.zeros(n_samples, bs).type(dtype)) Reward3 = Variable(torch.zeros(n_samples, bs).type(dtype)) for i in range(n_samples): Samplei, Lgp[i] = sample_one(probs, 'train') Ei = e*2 + Samplei.long() Reward2[i], _,_ = compute_reward(Ei, k+1, points) baseline = Reward2.mean(0,True).expand_as(Reward3) loss = 0.0 if (last and k == K-1) or not last: loss = ((Reward2-baseline) * Lgp).sum(1).sum(0) / n_samples / bs loss_total = loss_total + loss - reg_factor*variance show_loss = Reward2.data.mean() sample, lgp = sample_one(probs, 'test') e = e*2 + sample.long() reward,_,c = compute_reward(e, k+1, points) if mode == 'test': show_loss = reward.data.mean() if k < K-1: input = update_input(input, dists, sample, sigma2, e, k+1) if mode == 'test': return e, None, show_loss, c else: return e, loss_total, show_loss, c
def predict_mask(self, x): # print (x.size()) #[B,32,480,640] x = self.act_func(self.conv1_gp(x)) #[B,32,59,79] # print (x.size()) x = self.act_func(self.conv2_gp(x)) #[B,64,13,18] # print (x.size()) x = self.act_func(self.conv3_gp(x)) #[B,32,6,8] # print (x.size()) # print (x.size()) x = x.view(-1, self.intermediate_size) h1 = self.act_func(self.fc1_gp(x)) z = self.fc2_gp(h1) z = self.act_func(self.fc3_gp(z)) z = self.act_func(self.fc4_gp(z)) #[B,11264] z = z.view(-1, 32, 6, 8) z = self.act_func(self.deconv1_gp(z)) #[B,64,13,18] # print (z.size()) z = self.act_func(self.deconv2_gp(z)) #[B,64,59,79] # print (z.size()) z = self.deconv3_gp(z) # [B,1,452,580] # print (z.size()) # fdf return F.sigmoid(z)
def predict_mask(self, x): # print (x.size()) #[B,32,210,160] x = self.act_func(self.conv1_gp(x)) #[B,32,51,39] # print (x.size()) x = self.act_func(self.conv2_gp(x)) #[B,64,24,18] # print (x.size()) x = self.act_func(self.conv3_gp(x)) #[B,32,22,16] # print (x.size()) # print (x.size()) x = x.view(-1, self.intermediate_size) h1 = self.act_func(self.fc1_gp(x)) z = self.fc2_gp(h1) z = self.act_func(self.fc3_gp(z)) z = self.act_func(self.fc4_gp(z)) #[B,11264] z = z.view(-1, 32, 22, 16) z = self.act_func(self.deconv1_gp(z)) #[B,64,24,18] # print (z.size()) z = self.act_func(self.deconv2_gp(z)) #[B,64,51,39] # print (z.size()) grad = self.deconv3_gp(z) # [B,32,210,160] # print (grad.size()) # fdasd # x_hat_sigmoid = F.sigmoid(x_hat) # fds return F.sigmoid(grad)
def eval_by_batch(self,Xi, Xv, y, x_size): total_loss = 0.0 y_pred = [] if self.use_ffm: batch_size = 16384*2 else: batch_size = 16384 batch_iter = x_size // batch_size criterion = F.binary_cross_entropy_with_logits model = self.eval() for i in range(batch_iter+1): offset = i * batch_size end = min(x_size, offset + batch_size) if offset == end: break batch_xi = Variable(torch.LongTensor(Xi[offset:end])) batch_xv = Variable(torch.FloatTensor(Xv[offset:end])) batch_y = Variable(torch.FloatTensor(y[offset:end])) if self.use_cuda: batch_xi, batch_xv, batch_y = batch_xi.cuda(), batch_xv.cuda(), batch_y.cuda() outputs = model(batch_xi, batch_xv) pred = F.sigmoid(outputs).cpu() y_pred.extend(pred.data.numpy()) loss = criterion(outputs, batch_y) total_loss += loss.data[0]*(end-offset) total_metric = self.eval_metric(y,y_pred) return total_loss/x_size, total_metric
def forward(self, x): x = self.classify(x) if cfg.MRCNN.UPSAMPLE_RATIO > 1: x = self.upsample(x) if not self.training: x = F.sigmoid(x) return x
def adpW(self,x): ''' calculate the pairwise_att of everypair of inputs output_size: (x.size(0),x.size(1)/2) ''' x = x.detach() x = self.adp_metric_embedding1(x) x = self.adp_metric_embedding1_bn(x) x = F.relu(x) x = self.adp_metric_embedding2(x) x = self.adp_metric_embedding2_bn(x) x = F.relu(x) x = self.adp_metric_embedding3(x) x = self.adp_metric_embedding3_bn(x) x = F.relu(x) pairwise_att = F.sigmoid(self.adp_metric_embedding4(x)) # x = self.adp_metric_embedding2_bn(x) diag_matrix1 = [] diag_matrix2 = [] for i in range(x.size(0)): diag_matrix1.append(torch.diag(pairwise_att[i, :x.size(1)/2])) for i in range(x.size(0)): diag_matrix2.append(torch.diag(pairwise_att[i, x.size(1)/2:])) pairwise_att1 = torch.stack(diag_matrix1) pairwise_att2 = torch.stack(diag_matrix1) return pairwise_att1, pairwise_att2
def _call(self, x): shape = x.shape[:-1] + (1 + x.shape[-1],) one = x.new([1]).expand(x.shape[:-1] + (1,)) numer = sigmoid(x) denom = (1 - numer).cumprod(-1) probs = torch.cat([numer, one], -1) * torch.cat([one, denom], -1) return probs
def forward(self, x): """ In the forward function we accept a Variable of input data and we must return a Variable of output data. """ y_pred = F.sigmoid(self.linear(x)) return y_pred
def forward(self, frame, policies): # x: [B,2,84,84] self.B = frame.size()[0] #Predict mask pre_mask = self.predict_mask_nosigmoid(frame) mask = F.sigmoid(pre_mask) masked_frame = frame * mask kls = [] for i in range(len(policies)): policy = policies[i] log_dist_mask = policy.action_logdist(masked_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum((log_dist_true - log_dist_mask)*torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000 kls.append(action_dist_kl) kls = torch.stack(kls) #[policies, B] action_dist_kl = torch.mean(action_dist_kl) #[1] #over batch and over policies pre_mask = pre_mask.view(self.B, -1) mask_cost = torch.abs(pre_mask + 20) # mask_sum = torch.mean(torch.sum(mask_cost, dim=1)) * .00001 # mask_cost = torch.mean(mask_cost) * .00001 mask_cost = torch.mean(mask_cost) * .01 loss = action_dist_kl + mask_cost return loss, action_dist_kl, mask_cost
def forward(self, x, k=1): self.k = k self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) x_hat, logpW, logqW = self.decode(z) logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz + (logpW - logqW)*.00000001 #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz, logpW, logqW
def forward(self, input_features, adj): #x = self.conv1(input_features, adj) #x = self.bn1(x) #x = self.act(x) #x = self.conv2(x, adj) #x = self.bn2(x) # pool over all nodes #graph_h = self.pool_graph(x) graph_h = input_features.view(-1, self.max_num_nodes * self.max_num_nodes) # vae h_decode, z_mu, z_lsgms = self.vae(graph_h) out = F.sigmoid(h_decode) out_tensor = out.cpu().data recon_adj_lower = self.recover_adj_lower(out_tensor) recon_adj_tensor = self.recover_full_adj_from_lower(recon_adj_lower) # set matching features be degree out_features = torch.sum(recon_adj_tensor, 1) adj_data = adj.cpu().data[0] adj_features = torch.sum(adj_data, 1) S = self.edge_similarity_matrix(adj_data, recon_adj_tensor, adj_features, out_features, self.deg_feature_similarity) # initialization strategies init_corr = 1 / self.max_num_nodes init_assignment = torch.ones(self.max_num_nodes, self.max_num_nodes) * init_corr #init_assignment = torch.FloatTensor(4, 4) #init.uniform(init_assignment) assignment = self.mpm(init_assignment, S) #print('Assignment: ', assignment) # matching # use negative of the assignment score since the alg finds min cost flow row_ind, col_ind = scipy.optimize.linear_sum_assignment(-assignment.numpy()) print('row: ', row_ind) print('col: ', col_ind) # order row index according to col index #adj_permuted = self.permute_adj(adj_data, row_ind, col_ind) adj_permuted = adj_data adj_vectorized = adj_permuted[torch.triu(torch.ones(self.max_num_nodes,self.max_num_nodes) )== 1].squeeze_() adj_vectorized_var = Variable(adj_vectorized).cuda() #print(adj) #print('permuted: ', adj_permuted) #print('recon: ', recon_adj_tensor) adj_recon_loss = self.adj_recon_loss(adj_vectorized_var, out[0]) print('recon: ', adj_recon_loss) print(adj_vectorized_var) print(out[0]) loss_kl = -0.5 * torch.sum(1 + z_lsgms - z_mu.pow(2) - z_lsgms.exp()) loss_kl /= self.max_num_nodes * self.max_num_nodes # normalize print('kl: ', loss_kl) loss = adj_recon_loss + loss_kl return loss
def get_probs_and_logits(ps=None, logits=None, is_multidimensional=True): """ Convert probability values to logits, or vice-versa. Either ``ps`` or ``logits`` should be specified, but not both. :param ps: tensor of probabilities. Should be in the interval *[0, 1]*. If, ``is_multidimensional = True``, then must be normalized along axis -1. :param logits: tensor of logit values. For the multidimensional case, the values, when exponentiated along the last dimension, must sum to 1. :param is_multidimensional: determines the computation of ps from logits, and vice-versa. For the multi-dimensional case, logit values are assumed to be log probabilities, whereas for the uni-dimensional case, it specifically refers to log odds. :return: tuple containing raw probabilities and logits as tensors. """ assert (ps is None) != (logits is None) if ps is not None: eps = _get_clamping_buffer(ps) ps_clamped = ps.clamp(min=eps, max=1 - eps) if is_multidimensional: if ps is None: ps = softmax(logits, -1) else: logits = torch.log(ps_clamped) else: if ps is None: ps = F.sigmoid(logits) else: logits = torch.log(ps_clamped) - torch.log1p(-ps_clamped) return ps, logits
def forward(self, input): W, x, Y = input N = Y.size(-1) bs = Y.size(0) mask1 = Variable((W.data[:,:,:,-1] > 0).float()) mask2 = Variable(W.data[:,:,:,0].float().sum(2)) U = Variable(W.data[:,:,:,-1]) Ns = Variable(W.data[:,:,:,0].float().sum(2).sum(1).clamp(min=1)) xB = self.beta(x) * mask2.unsqueeze(2).expand_as(x) # has size (bs,N,R) Y = torch.bmm(xB, x.permute(0,2,1)) - (1-mask1)*10000 Y = F.softmax(Y.permute(1,0,2)).permute(1,0,2) #Y = (Y + Y.permute(0,2,1)) / 2 #Y = Y * mask1 x = gmul((W, x, Y)) # out has size (bs, N, num_inputs) x_size = x.size() x = x.contiguous() x = x.view(-1, self.num_inputs) if self.last: x1 = self.fc1(x) else: x1 = F.sigmoid(self.fc1(x)) # has size (bs*N, num_outputs // 2) x2 = self.fc2(x) x = torch.cat((x1, x2), 1) x = x.view(*x_size[:-1], self.num_outputs) x = bnorm(x, U) x = x * self.gamma.unsqueeze(0).unsqueeze(1).expand_as(x) return W, x, Y
def test_bernoulli_overflow_gradient(init_tensor_type): p = Variable(init_tensor_type([1e32]), requires_grad=True) bernoulli = Bernoulli(sigmoid(p)) log_pdf = bernoulli.batch_log_pdf(Variable(init_tensor_type([1]))) log_pdf.sum().backward() assert_equal(log_pdf.data[0], 0) assert_equal(p.grad.data[0], 0)
def forward(self, x, k=1): self.B = x.size()[0] mu, logvar = self.encode(x) z, logpz, logqz = self.sample(mu, logvar, k=k) #[P,B,Z] x_hat = self.decode(z) #[PB,X] x_hat = x_hat.view(k, self.B, -1) # print x_hat.size() # print x_hat.size() # print x.size() logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = torch.mean(elbo) #[1] #for printing logpx = torch.mean(logpx) logpz = torch.mean(logpz) logqz = torch.mean(logqz) self.x_hat_sigmoid = F.sigmoid(x_hat) return elbo, logpx, logpz, logqz
def forward(self, x): gate = F.sigmoid(self.hw_gate(x)) res = gate * self.hw_tran(x) return res
def forward(self, x): gate = F.sigmoid(self.hw_gate(x)) tran = F.tanh(self.hw_tran(x)) proj = self.proj(x) res = gate * tran + (1 - gate) * proj return res
def main(argv): #name = argv[0] name = argv[1] #load model dictionary save_dic = torch.load('data/model_100') #load model model = Net().cuda() model.apply(model_init) model_dict = model.state_dict() pretrained_dict1 = { k: v for k, v in save_dic['state_dict'].items() if k in model_dict } model_dict.update(pretrained_dict1) model.load_state_dict(model_dict) print('finishing loading model') #load test dataset Xavg, Xstd = save_dic['avg'], save_dic['std'] Xte = load_te_mp3(name, Xavg.data.cpu().numpy(), Xstd.data.cpu().numpy()) print('finishing loading dataset') #predict configure v_kwargs = {'batch_size': 8, 'num_workers': 10, 'pin_memory': True} loader = torch.utils.data.DataLoader(Data2Torch([Xte]), **v_kwargs) s = Xte.shape pred_inst = np.zeros((s[0], 10, s[2])) pred_pitch = np.zeros((s[0], 88, s[2])) pred_roll = np.zeros((s[0], 10, 88, s[2])) #start predict print('start predicting...') model.eval() ds = 0 for idx, _input in enumerate(loader): data = Variable(_input.cuda()) pred = model(data, Xavg, Xstd) pred_inst[ds:ds + len(data)] = np.repeat(F.sigmoid( pred[0]).data.cpu().numpy(), 2, axis=2) pred_pitch[ds:ds + len(data)] = np.repeat(F.sigmoid( pred[1]).data.cpu().numpy(), 2, axis=2) pred_roll[ds:ds + len(data)] = np.repeat(F.sigmoid( pred[2]).data.cpu().numpy(), 2, axis=3) ds += len(data) threshold = 0.85 pred_inst = all_pred = np.transpose(pred_inst, (1, 0, 2)).reshape((10, -1)) pred_pitch = np.transpose(pred_pitch, (1, 0, 2)).reshape((88, -1)) pred_roll = np.transpose(pred_roll, (1, 2, 0, 3)).reshape((10, 88, -1)) pred_inst = np.delete(pred_inst, [3], axis=0) pred_roll = np.delete(pred_roll, [3], axis=0) pred_pitch[pred_pitch > threshold] = 1 pred_pitch[pred_pitch <= threshold] = 0 pred_roll[pred_roll > threshold] = 1 pred_roll[pred_roll <= threshold] = 0 np.save('output_data/inst/' + name[:-4] + '.npy', pred_inst) np.save('output_data/pitch/' + name[:-4] + '.npy', pred_pitch) np.save('output_data/roll/' + name[:-4] + '.npy', pred_roll)
import torch import torch.nn.functional as F from torch.autograd import Variable import matplotlib.pyplot as plt # fake data x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1) x = Variable(x) x_np = x.data.numpy() y_relu = F.relu(x).data.numpy() y_sigmoid = F.sigmoid(x).data.numpy() y_tanh = F.tanh(x).data.numpy() y_softplus = F.softplus(x).data.numpy() plt.figure(1, figsize=(8, 6)) plt.subplot(221) plt.plot(x_np, y_relu, c='red', label='relu') plt.ylim(-1, 5) plt.legend(loc='best') plt.subplot(222) plt.plot(x_np, y_sigmoid, c='red', label='sigmoid') plt.ylim(-0.2, 1.2) plt.legend(loc='best') plt.subplot(223) plt.plot(x_np, y_tanh, c='red', label='tanh') plt.ylim(-1.2, 1.2) plt.legend(loc='best')
def forward(self, x: Tensor): return x * F.sigmoid(1.702 * x)
def forward(self, x): nc = x.size(1) assert nc % 2 == 0, 'channels dont divide 2!' nc = int(nc / 2) return x[:, :nc] * F.sigmoid(x[:, nc:])
def forward(self, x): x = F.leaky_relu(self.layer1(x), 0.2, inplace=True) x = F.leaky_relu(self.layer3(self.layer2(x)), 0.2, inplace=True) x = F.leaky_relu(self.layer5(self.layer4(x)), 0.2, inplace=True) x = F.sigmoid(self.layer6(x)) return x.view(-1, 1)
def forward(self, x): return x * F.sigmoid(x)
def forward(self, x): x = F.elu(self.map1(x)) x = F.elu(self.map2(x)) return F.sigmoid(self.map3(x))
def forward(self, x): x_pool = 0.5 * x.mean((2, 3), keepdim=True) + 0.5 * x.amax((2, 3), keepdim=True) x_attn = self.fc2(self.act(self.fc1(x_pool))) return x * F.sigmoid(x_attn)
def forward(self, x): batch_size = x.size(0) return F.sigmoid(self.net(x).view(batch_size))
def test(): model.eval() f1s, acc_loss, acc_crit, acc_reg = [], [], [], [] for i, dl_test_pose in enumerate(dl_test): targets, preds = [], [] print( '-----------------------------------Evaluating POSE {} ------------------------- ' .format(poses[i])) for iter, (data, target, _) in enumerate(dl_test_pose): target = torch.clamp(target[:, aus], 0, 1) data, target = data.cuda(), target.cuda() data, target = Variable(data).float(), Variable(target).float() with torch.no_grad(): pred, mu, logvar = model(data) pred = F.sigmoid(pred) crit_val = bce_loss(pred, target) reg_val = kld(mu, logvar) / len(data) loss = crit_val + beta * reg_val acc_crit.append(crit_val.data) acc_reg.append(reg_val.data) acc_loss.append(loss.data) preds.append(pred) targets.append(target.data.cpu().numpy()) preds = np.asarray(np.concatenate(preds)) print('preds min:{}, max:{}, mean:{}'.format(preds.min(), preds.max(), np.mean(preds))) targets = np.clip(np.rint(np.concatenate(targets)), 0, 1).astype(np.uint8) ''' Evaluate model per pose''' f1_pose = [] for t in eval_thresholds: preds_f = np.copy(preds) preds_f[np.where(preds_f < t)] = 0 preds_f[np.where(preds_f >= t)] = 1 preds_f = np.reshape(preds_f, (-1, n_classes)) if t == 0.5: print('--------EVAL PRED------ t = {}'.format(t)) _, _, f1, _, _ = evaluate_model(targets, preds_f, verbose=True) else: _, _, f1, _, _ = evaluate_model(targets, preds_f, verbose=False) f1_pose.append(f1) f1s.append(f1_pose) ''' Log validation loss ''' info = { 'loss_test': np.mean(acc_loss), 'crit_test': np.mean(acc_crit), 'reg_test': np.mean(acc_reg) } for tag, value in info.items(): logger.scalar_summary(tag, value, epoch) ''' Log F1 per threshold''' f1s = np.mean(f1s, axis=0) for i, t in enumerate(eval_thresholds): info = {'f1_val_t_' + str(t): f1s[i]} for tag, value in info.items(): logger.scalar_summary(tag, value, epoch) return np.mean(acc_loss), np.mean(acc_crit), np.mean(acc_reg), f1s
def forward(self,x): y_pred = F.sigmoid(self.liner(x)) return y_pred
def forward(self, x): sigmoid = F.sigmoid(x)**self.b return x * sigmoid
def train(self): since = time.time() best_sel_loss = 0 best_iou = 0 for epoch in range(self.num_epochs): print('Epoch {}/{}'.format(epoch, self.num_epochs - 1), flush=True) print('-' * 10, flush=True) # Each epoch has a training and validation phase for phase in ['train', 'valid']: if phase == 'train': #Set the models to training mode self.predictor.train() self.discriminator.train() self.selector.train() self.baseline.train() else: #Set the models to evaluation mode self.predictor.eval() self.discriminator.eval() self.selector.eval() self.baseline.eval() #Keep a track of all the three loss running_sel_loss = 0.0 running_pred_loss = 0.0 running_dis_loss = 0.0 running_base_loss = 0.0 running_spa = 0.0 #Metrics : accuracy running_pred_acc = 0 running_dis_acc = 0 #running_dis_met = 0 running_base_acc = 0 running_iou = 0 #tqdm bar pbar = tqdm(total=self.dataset_sizes[phase]) # Iterate over data. for sampled_batch in self.dataloaders[phase]: inputs = sampled_batch['image'] labels = sampled_batch['category'] mask = sampled_batch['mask'] #Input needs to be float and labels long inputs = inputs.float().to(self.device) labels = labels.long().to(self.device) mask = mask.to(self.device) # zero the parameter gradients self.optimizer_sel.zero_grad() self.optimizer_pred.zero_grad() self.optimizer_dis.zero_grad() self.optimizer_base.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): #import pdb;pdb.set_trace() #Generate predictor output probabilities base_out, _ = self.baseline(inputs) base_prob = F.softmax(base_out) _, base_preds = torch.max(base_out, 1) #Baseline Cross entropy base_ce_loss = F.cross_entropy(base_out, labels) #Generate selection probabilites using selector function. This will be the mask #sel_prob = F.sigmoid(self.selector(inputs)) sel_prob = self.selector(inputs) sel_prob = F.sigmoid(sel_prob) #sel_prob = sel_prob - sel_prob.min() #sel_prob = sel_prob/sel_prob.max() pred_ce_loss = 0 dis_loss = 0 sel_loss = 0 sparsity = 0 #dis_metric = 0 iou = 0 for sampling_ind in range(10): bin_samples = sampler(sel_prob.data.cpu().numpy()) bin_samples = torch.Tensor(bin_samples).to( self.device) bin_mask = self.prob_mask(bin_samples).to( self.device) #print(bin_samples) sparsity += torch.mean(bin_samples) iou += get_IoU(bin_mask, mask) #Compute the Complementary selection probability comp_bin_mask = 1 - bin_mask #Generate X_S the selection probability masked image x_s = inputs * bin_mask #Generate X_S_bar the complementary selection probability masked image x_s_bar = inputs * comp_bin_mask #Generate predictor output probabilities pred_out, _ = self.predictor(x_s) pred_prob = F.softmax(pred_out) _, pred_preds = torch.max(pred_out, 1) #Generate discriminator probabilities) dis_out, _ = self.discriminator(x_s_bar) dis_prob = F.softmax(dis_out) _, dis_preds = torch.max(dis_out, 1) #dis_metric += torch.mean(torch.abs(dis_prob-0.5)) #print(torch.mean(torch.abs(dis_prob-0.5))) #Predictor Cross entropy pred_ce_loss += F.cross_entropy(pred_out, labels) #Discriminator Negative Cross entropy #dis_loss += -torch.log(dis_prob[0][0]*dis_prob[0][1])#-F.cross_entropy(dis_out,labels) dis_loss += dis_prob[0][labels[0]] #print(pred_out,dis_out,labels) with torch.no_grad(): dis_ce_loss = F.cross_entropy(dis_out, labels) #first KL divergence term kl_1 = -base_ce_loss + pred_ce_loss #second KL divergence term kl_2 = -base_ce_loss + dis_ce_loss #the difference in the two KL divergence terms kl_diff = kl_1 - self.alpha * kl_2 #Selector function loss l1_loss = torch.mean(sel_prob) distribution_loss = torch.mean( bin_samples * torch.log(sel_prob + 1e-8) + (1 - bin_samples) * torch.log(1 - sel_prob + 1e-8)) sel_loss += distribution_loss * kl_diff + self.beta * l1_loss pred_ce_loss /= 10 dis_loss /= 10 sel_loss /= 10 sparsity /= 10 #dis_metric /= 10 iou /= 10 # backward + optimize only if in training phase if phase == 'train': #The gradients of pred_ce_loss should not update the params of disc or sel base_ce_loss.backward(retain_graph=True) self.optimizer_sel.zero_grad() self.optimizer_dis.zero_grad() self.optimizer_pred.zero_grad() self.optimizer_base.step() #Update predictor using pred_ce_loss #The gradients of pred_ce_loss should not update the params of disc or sel pred_ce_loss.backward(retain_graph=True) self.optimizer_sel.zero_grad() self.optimizer_dis.zero_grad() self.optimizer_base.zero_grad() self.optimizer_pred.step() #The gradients of dis_ce_loss should not update the params of pred or sel dis_loss.backward(retain_graph=True) self.optimizer_sel.zero_grad() self.optimizer_pred.zero_grad() self.optimizer_base.zero_grad() self.optimizer_dis.step() #Update sel sel_loss.backward() self.optimizer_pred.zero_grad() self.optimizer_dis.zero_grad() self.optimizer_base.zero_grad() self.optimizer_sel.step() # statistics running_sel_loss += sel_loss.item() * inputs.size(0) running_pred_loss += pred_ce_loss.item() * inputs.size(0) running_dis_loss += dis_loss.item() * inputs.size(0) running_base_loss += base_ce_loss.item() * inputs.size(0) running_spa += sparsity * inputs.size(0) running_pred_acc += torch.sum(pred_preds == labels.data) running_dis_acc += torch.sum(dis_preds == (1 - labels.data)) #running_dis_met += dis_metric running_base_acc += torch.sum(base_preds == labels.data) running_iou += iou * inputs.size(0) #print(running_base_acc) pbar.update(inputs.shape[0]) pbar.close() epoch_base_loss = running_base_loss / self.dataset_sizes[phase] epoch_sel_loss = running_sel_loss / self.dataset_sizes[phase] epoch_pred_loss = running_pred_loss / self.dataset_sizes[phase] epoch_dis_loss = running_dis_loss / self.dataset_sizes[phase] epoch_spa = running_spa / self.dataset_sizes[phase] epoch_base_acc = running_base_acc.double( ) / self.dataset_sizes[phase] epoch_pred_acc = running_pred_acc.double( ) / self.dataset_sizes[phase] epoch_dis_acc = running_dis_acc.double( ) / self.dataset_sizes[phase] #epoch_dis_met = running_dis_met / self.dataset_sizes[phase] epoch_iou = running_iou / self.dataset_sizes[phase] print( '{} Base_Loss: {:.4f} Sel_Loss: {:.4f} Pred_Loss: {:.4f} Dis_Loss: {:.4f} Spa: {:.4f} BAC: {:.4f} PAC: {:.4f} DAC: {:.4f} IoU: {:.4f}' .format(phase, epoch_base_loss, epoch_sel_loss, epoch_pred_loss, epoch_dis_loss, epoch_spa, epoch_base_acc, epoch_pred_acc, epoch_dis_acc, epoch_iou)) # deep copy the model if phase == 'valid' and epoch_iou > best_iou: best_iou = epoch_iou torch.save(self.selector.state_dict(), self.exp_name + '_sel.pt') torch.save(self.baseline.state_dict(), self.exp_name + '_base.pt') torch.save(self.predictor.state_dict(), self.exp_name + '_pred.pt') torch.save(self.discriminator.state_dict(), self.exp_name + '_dis.pt') #import pdb;pdb.set_trace() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best Sel Loss: {:4f}'.format(best_sel_loss)) torch.save(self.baseline.state_dict(), self.exp_name + '_base_final.pt') torch.save(self.selector.state_dict(), self.exp_name + '_sel_final.pt') torch.save(self.predictor.state_dict(), self.exp_name + '_pred_final.pt') torch.save(self.discriminator.state_dict(), self.exp_name + '_dis_final.pt') print('Training completed finally !!!!!')
def forward(self, x): ''' Only support batch = 1 :param x: :return: ''' features_RoI, features_mask, proposals, targets = x # binarized select_feature_mask, \ sorted_feature_roi, \ sorted_feature_mask, \ distances,\ sorted_proposals, \ bboxes, \ label_index,\ center,\ distance_before_refine = self.prepare_msk_relation(features_RoI, features_mask, proposals,) if self.cfg.MODEL.RELATION_MASK.TYPE == 'CAM': for i in range(self.fg_class): feature = select_feature_mask[i] select_feature_mask[i] = self.relation_module( feature[None, :, :, :])[0, :, :, :] select_feature_mask = torch.cat(select_feature_mask) index = torch.arange(select_feature_mask.shape[0], device=select_feature_mask.device) sorted_feature_mask[index, label_index] = select_feature_mask[:, :, :] return sorted_feature_mask, sorted_proposals, targets, None # distance_before_refine = torch.cat(distance_before_refine) if self.cfg.MODEL.RELATION_MASK.TYPE == 'CIAM': cls_length = [ select_feature_mask[i].shape[0] for i in range(self.fg_class) ] select_feature_mask = torch.cat(select_feature_mask, dim=0) sorted_feature_roi = torch.cat(sorted_feature_roi, dim=0) select_feature_mask = F.sigmoid(select_feature_mask) sorted_feature = self.appearance_feature_extractor( (sorted_feature_roi, select_feature_mask[:, None, :, :])) if self.cfg.MODEL.RELATION_MASK.FEATURE_EXTRACTOR == 'SameFeatureMask': sorted_feature, select_feature_mask = sorted_feature sorted_feature = torch.split(sorted_feature, cls_length) else: sorted_feature = torch.split(sorted_feature, cls_length) relation_feature = [] for i in range(self.fg_class): feature = sorted_feature[i] if feature.shape[0] != 0: relation_feature.append(self.relation_module(feature)) relation_feature = torch.cat(relation_feature) if self.cfg.MODEL.RELATION_MASK.SAME_PREDICTOR \ and self.cfg.MODEL.RELATION_MASK.FEATURE_EXTRACTOR in\ [ "SameSizeRoiAlignMaskFeatureExtractor", "SameFeatureMask", 'DeepFeatureExtractor']: relation_feature = self.predictor(relation_feature) else: relation_feature = self.deconv_1(relation_feature) relation_feature = F.relu(relation_feature) relation_feature = self.classifier(relation_feature) index = torch.arange(relation_feature.shape[0], device=select_feature_mask.device) sorted_feature_mask[index] = relation_feature return sorted_feature_mask, sorted_proposals, targets, None
def inference(self, x, actual_frame_length, sampling_sec, min_prop_num, max_prop_num, min_prop_num_before_nms, pos_thresh, stride_factor, gated_mask=False): B, T, _ = x.size() dtype = x.data.type() x_rgb, x_flow = torch.split(x, 2048, 2) x_rgb = self.rgb_emb(x_rgb.contiguous()) x_flow = self.flow_emb(x_flow.contiguous()) x = torch.cat((x_rgb, x_flow), 2) x = self.emb_out(x) vis_feat, all_emb = self.vis_emb(x) # vis_feat = self.vis_dropout(vis_feat) # B x T x H -> B x H x T # for 1d conv vis_feat = vis_feat.transpose(1, 2).contiguous() prop_lst = [] for i, kernel in enumerate(self.prop_out): kernel_size = self.kernel_list[i] if kernel_size <= actual_frame_length[ 0]: # no need to use larger kernel size in this case, batch size is only 1 pred_o = kernel(vis_feat) anchor_c = Variable( torch.FloatTensor( np.arange( float(kernel_size) / 2.0, float(T + 1 - kernel_size / 2.0), math.ceil(kernel_size / stride_factor))).type(dtype)) if anchor_c.size(0) != pred_o.size(-1): raise Exception("size mismatch!") anchor_c = anchor_c.expand(B, 1, anchor_c.size(0)) anchor_l = Variable( torch.FloatTensor( anchor_c.size()).fill_(kernel_size).type(dtype)) pred_final = torch.cat((pred_o, anchor_l, anchor_c), 1) prop_lst.append(pred_final) else: print('skipping kernel sizes greater than {}'.format( self.kernel_list[i])) break prop_all = torch.cat(prop_lst, 2) # assume 1st and 2nd are action prediction and overlap, respectively prop_all[:, :2, :] = F.sigmoid(prop_all[:, :2, :]) pred_len = prop_all[:, 4, :] * torch.exp(prop_all[:, 2, :]) pred_cen = prop_all[:, 5, :] + prop_all[:, 4, :] * prop_all[:, 3, :] nms_thresh_set = np.arange(0.9, 0.95, 0.05).tolist() all_proposal_results = [] # store positional encodings, size of B x 4, # the first B values are predicted starts, # second B values are predicted ends, # third B values are anchor starts, # last B values are anchor ends pred_start_lst = [] #torch.zeros(B * 4).type(dtype) pred_end_lst = [] anchor_start_lst = [] anchor_end_lst = [] anchor_window_mask = [] #Variable(torch.zeros(B, T).type(dtype)) gate_scores = [] #Variable(torch.zeros(B, 1).type(dtype)) for b in range(B): crt_pred = prop_all.data[b] crt_pred_cen = pred_cen.data[b] crt_pred_len = pred_len.data[b] pred_masks = [] batch_result = [] crt_nproposal = 0 nproposal = torch.sum(torch.gt(prop_all.data[b, 0, :], pos_thresh)) nproposal = min(max(nproposal, min_prop_num_before_nms), prop_all.size(-1)) pred_results = np.empty((nproposal, 3)) _, sel_idx = torch.topk(crt_pred[0], nproposal) start_t = time.time() for nms_thresh in nms_thresh_set: for prop_idx in range(nproposal): original_frame_len = actual_frame_length[b].item( ) + sampling_sec * 2 # might be truncated at the end, hence + frame_to_second*2 pred_start_w = crt_pred_cen[sel_idx[ prop_idx]] - crt_pred_len[sel_idx[prop_idx]] / 2.0 pred_end_w = crt_pred_cen[sel_idx[ prop_idx]] + crt_pred_len[sel_idx[prop_idx]] / 2.0 pred_start = pred_start_w pred_end = pred_end_w if pred_start >= pred_end: continue if pred_end >= original_frame_len or pred_start < 0: continue hasoverlap = False if crt_nproposal > 0: if np.max( segment_iou(np.array([pred_start, pred_end]), pred_results[:crt_nproposal]) ) > nms_thresh: hasoverlap = True if not hasoverlap: pred_bin_window_mask = torch.zeros(1, T, 1).type(dtype) win_start = math.floor( max( min(pred_start, min(original_frame_len, T) - 1), 0)) win_end = math.ceil( max(min(pred_end, min(original_frame_len, T)), 1)) # if win_start >= win_end: # print('length: {}, mask window start: {} >= window end: {}, skipping'.format( # original_frame_len, win_start, win_end, # )) # continue pred_bin_window_mask[:, win_start:win_end] = 1 pred_masks.append(pred_bin_window_mask) if self.learn_mask: # 4, 5 are the indices for anchor length and center anc_len = crt_pred[4, sel_idx[prop_idx]] anc_cen = crt_pred[5, sel_idx[prop_idx]] # only use the pos sample to train, could potentially use more sample for training mask, but this is easier to do amask = torch.zeros(1, T).type(dtype) amask[0, max(0, math.floor(anc_cen - anc_len / 2.) ):min(T, math.ceil(anc_cen + anc_len / 2.))] = 1. anchor_window_mask.append(amask) pred_start_lst.append( torch.Tensor([pred_start_w]).type(dtype)) pred_end_lst.append( torch.Tensor([pred_end_w]).type(dtype)) anchor_start_lst.append( torch.Tensor([ max(0, math.floor(anc_cen - anc_len / 2.)) ]).type(dtype)) anchor_end_lst.append( torch.Tensor([ min(T, math.ceil(anc_cen + anc_len / 2.)) ]).type(dtype)) gate_scores.append( torch.Tensor([crt_pred[0, sel_idx[prop_idx]] ]).type(dtype)) pred_results[crt_nproposal] = np.array([ win_start, win_end, crt_pred[0, sel_idx[prop_idx]] ]) crt_nproposal += 1 if crt_nproposal >= max_prop_num: break if crt_nproposal >= min_prop_num: break mid1_t = time.time() if len(pred_masks ) == 0: # append all-one window if no window is proposed pred_masks.append(torch.ones(1, T, 1).type(dtype)) pred_results[0] = np.array( [0, min(original_frame_len, T), pos_thresh]) crt_nproposal = 1 pred_masks = Variable(torch.cat(pred_masks, 0)) batch_x = x[b].unsqueeze(0).expand(pred_masks.size(0), x.size(1), x.size(2)) if self.learn_mask: pe_pred_start = torch.cat(pred_start_lst, 0) pe_pred_end = torch.cat(pred_end_lst, 0) pe_anchor_start = torch.cat(anchor_start_lst, 0) pe_anchor_end = torch.cat(anchor_end_lst, 0) pe_locs = torch.cat((pe_pred_start, pe_pred_end, pe_anchor_start, pe_anchor_end), 0) pos_encs = positional_encodings(pe_locs, self.d_model // 4) npos = pos_encs.size(0) anchor_window_mask = Variable(torch.cat(anchor_window_mask, 0)) in_pred_mask = torch.cat( (pos_encs[:npos // 4], pos_encs[npos // 4:npos // 4 * 2], pos_encs[npos // 4 * 2:npos // 4 * 3], pos_encs[npos // 4 * 3:npos // 4 * 4], anchor_window_mask), 1) pred_cont_masks = self.mask_model(in_pred_mask).unsqueeze(2) if gated_mask: gate_scores = Variable( torch.cat(gate_scores, 0).view(-1, 1, 1)) window_mask = (gate_scores * pred_masks + (1 - gate_scores) * pred_cont_masks) else: window_mask = pred_cont_masks else: window_mask = pred_masks mid2_t = time.time() pred_sentence = [] # use cap_batch as caption batch size cap_batch = math.ceil(480 * 256 / T) for sent_i in range(math.ceil(window_mask.size(0) / cap_batch)): batch_start = sent_i * cap_batch batch_end = min((sent_i + 1) * cap_batch, window_mask.size(0)) pred_sentence += self.cap_model.greedy( batch_x[batch_start:batch_end], window_mask[batch_start:batch_end], 20) pred_results = pred_results[:crt_nproposal] assert len(pred_sentence) == crt_nproposal, ( "number of predicted sentence and proposal does not match") for idx in range(len(pred_results)): batch_result.append( (pred_results[idx][0], pred_results[idx][1], pred_results[idx][2], pred_sentence[idx])) all_proposal_results.append(tuple(batch_result)) end_t = time.time() print( 'Processing time for tIoU: {:.2f}, mask: {:.2f}, caption: {:.2f}' .format(mid1_t - start_t, mid2_t - mid1_t, end_t - mid2_t)) return all_proposal_results
def forward(self, x, hidden): output, hidden = self.rnn(x, hidden) decoded = F.sigmoid(self.decoder(output)) return decoded, hidden
def predict(img_path, mask_path, net): """Used for Kaggle submission: predicts and encode all test images""" id_ = [] pred_num = [] mask_num = [] inter = [] union = [] threshold = [] height, width = 101, 101 if height % 32 == 0: y_min_pad = 0 y_max_pad = 0 else: y_pad = 32 - height % 32 y_min_pad = int(y_pad / 2) y_max_pad = y_pad - y_min_pad if width % 32 == 0: x_min_pad = 0 x_max_pad = 0 else: x_pad = 32 - width % 32 x_min_pad = int(x_pad / 2) x_max_pad = x_pad - x_min_pad N = len(list(os.listdir(img_path))) for index, name in enumerate(os.listdir(img_path)): if index % 500 == 0: print('{}/{}'.format(index, N)) id_.append(str(name)[:-4]) # img = cv2.imread(img_path + name, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255 img = load_image(img_path + name) # img = cv2.resize(img, (128, 128), interpolation=cv2.INTER_CUBIC) mask_true = cv2.imread(mask_path + name, cv2.IMREAD_GRAYSCALE).astype( np.float32) // 255 # image = hwc_to_chw(img) image = img.unsqueeze(0) if torch.cuda.is_available(): image = Variable(image.cuda()) else: image = Variable(image) with torch.no_grad(): mask_pred = net(image) mask_prob = F.sigmoid(mask_pred).squeeze(0) tf = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(101), transforms.ToTensor() ]) mask = mask_prob.cpu().numpy()[:, y_min_pad:128 - y_max_pad, x_min_pad:128 - x_max_pad] # mask = tf(mask_prob.cpu()) # mask = tf(mask_pred.cpu().squeeze(0)) mask_pred_np = mask.squeeze( ) > 0.4332206261113065 # 0.663294217 #0.5465437 mask_true_np = np.array(mask_true) pred_num.append(mask_pred_np.sum()) mask_num.append(mask_true_np.sum()) inter_ = ((mask_pred_np == 1) & (mask_true_np == 1)).sum() union_ = ((mask_pred_np == 1) | (mask_true_np == 1)).sum() inter.append(inter_) union.append(union_) threshold.append(inter_ / union_) data = pd.DataFrame( data={ "id": id_, "pred": pred_num, "mask": mask_num, "inter": inter, "union": union, "threshold": threshold }) data.to_csv("calculate_fold1_PAD_e145.csv", index=False)
def update(self, net, x_crops, target_pos, target_sz, window, scale_z, p): if self.align: cls_score, bbox_pred, cls_align = net.track(x_crops) cls_score = F.sigmoid(cls_score).squeeze().cpu().data.numpy() cls_align = F.sigmoid(cls_align).squeeze().cpu().data.numpy() cls_score = p.ratio * cls_score + (1 - p.ratio) * cls_align else: cls_score, bbox_pred = net.track(x_crops) cls_score = F.sigmoid(cls_score).squeeze().cpu().data.numpy() # bbox to real predict bbox_pred = bbox_pred.squeeze().cpu().data.numpy() pred_x1 = self.grid_to_search_x - bbox_pred[0, ...] pred_y1 = self.grid_to_search_y - bbox_pred[1, ...] pred_x2 = self.grid_to_search_x + bbox_pred[2, ...] pred_y2 = self.grid_to_search_y + bbox_pred[3, ...] # size penalty s_c = self.change( self.sz(pred_x2 - pred_x1, pred_y2 - pred_y1) / (self.sz_wh(target_sz))) # scale penalty r_c = self.change( (target_sz[0] / target_sz[1]) / ((pred_x2 - pred_x1) / (pred_y2 - pred_y1))) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * p.penalty_k) pscore = penalty * cls_score # window penalty pscore = pscore * (1 - p.window_influence) + window * p.window_influence if self.online_score is not None: s_size = pscore.shape[0] o_score = cv2.resize(self.online_score, (s_size, s_size), interpolation=cv2.INTER_CUBIC) pscore = p.online_ratio * o_score + (1 - p.online_ratio) * pscore else: pass # get max r_max, c_max = np.unravel_index(pscore.argmax(), pscore.shape) # to real size pred_x1 = pred_x1[r_max, c_max] pred_y1 = pred_y1[r_max, c_max] pred_x2 = pred_x2[r_max, c_max] pred_y2 = pred_y2[r_max, c_max] pred_xs = (pred_x1 + pred_x2) / 2 pred_ys = (pred_y1 + pred_y2) / 2 pred_w = pred_x2 - pred_x1 pred_h = pred_y2 - pred_y1 diff_xs = pred_xs - p.instance_size // 2 diff_ys = pred_ys - p.instance_size // 2 diff_xs, diff_ys, pred_w, pred_h = diff_xs / scale_z, diff_ys / scale_z, pred_w / scale_z, pred_h / scale_z target_sz = target_sz / scale_z # size learning rate lr = penalty[r_max, c_max] * cls_score[r_max, c_max] * p.lr # size rate res_xs = target_pos[0] + diff_xs res_ys = target_pos[1] + diff_ys res_w = pred_w * lr + (1 - lr) * target_sz[0] res_h = pred_h * lr + (1 - lr) * target_sz[1] target_pos = np.array([res_xs, res_ys]) target_sz = target_sz * (1 - lr) + lr * np.array([res_w, res_h]) return target_pos, target_sz, cls_score[r_max, c_max]
def discretized_mix_logistic_loss(x, l): """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """ # Pytorch ordering x = x.permute(0, 2, 3, 1) l = l.permute(0, 2, 3, 1) xs = [int(y) for y in x.size()] ls = [int(y) for y in l.size()] # here and below: unpacking the params of the mixture of logistics nr_mix = int(ls[-1] / 10) logit_probs = l[:, :, :, :nr_mix] l = l[:, :, :, nr_mix:].contiguous().view( xs + [nr_mix * 3]) # 3 for mean, scale, coef means = l[:, :, :, :, :nr_mix] # log_scales = torch.max(l[:, :, :, :, nr_mix:2 * nr_mix], -7.) log_scales = torch.clamp(l[:, :, :, :, nr_mix:2 * nr_mix], min=-7.) coeffs = F.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) # here and below: getting the means and adjusting them based on preceding # sub-pixels x = x.contiguous() x = x.unsqueeze(-1) + Variable(torch.zeros(xs + [nr_mix]).cuda(), requires_grad=False) m2 = (means[:, :, :, 1, :] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :]).view( xs[0], xs[1], xs[2], 1, nr_mix) m3 = (means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x[:, :, :, 1, :]).view( xs[0], xs[1], xs[2], 1, nr_mix) means = torch.cat((means[:, :, :, 0, :].unsqueeze(3), m2, m3), dim=3) centered_x = x - means inv_stdv = torch.exp(-log_scales) plus_in = inv_stdv * (centered_x + 1. / 255.) cdf_plus = F.sigmoid(plus_in) min_in = inv_stdv * (centered_x - 1. / 255.) cdf_min = F.sigmoid(min_in) # log probability for edge case of 0 (before scaling) log_cdf_plus = plus_in - F.softplus(plus_in) # log probability for edge case of 255 (before scaling) log_one_minus_cdf_min = -F.softplus(min_in) cdf_delta = cdf_plus - cdf_min # probability for all other cases mid_in = inv_stdv * centered_x # log probability in the center of the bin, to be used in extreme cases # (not actually used in our code) log_pdf_mid = mid_in - log_scales - 2. * F.softplus(mid_in) # now select the right output: left edge case, right edge case, normal # case, extremely low prob case (doesn't actually happen for us) # this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select() # log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta))) # robust version, that still works if probabilities are below 1e-5 (which never happens in our code) # tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs # the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue # if the probability on a sub-pixel is below 1e-5, we use an approximation # based on the assumption that the log-density is constant in the bin of # the observed sub-pixel value inner_inner_cond = (cdf_delta > 1e-5).float() inner_inner_out = inner_inner_cond * torch.log( torch.clamp(cdf_delta, min=1e-12)) + (1. - inner_inner_cond) * ( log_pdf_mid - np.log(127.5)) inner_cond = (x > 0.999).float() inner_out = inner_cond * log_one_minus_cdf_min + ( 1. - inner_cond) * inner_inner_out cond = (x < -0.999).float() log_probs = cond * log_cdf_plus + (1. - cond) * inner_out log_probs = torch.sum(log_probs, dim=3) + log_prob_from_logits(logit_probs) return -torch.sum(log_sum_exp(log_probs))
def forward(self, x, s_pos, s_neg, sentence, sample_prob=0, stride_factor=10, scst=False, gated_mask=False): B, T, _ = x.size() dtype = x.data.type() x_rgb, x_flow = torch.split(x, 2048, 2) x_rgb = self.rgb_emb(x_rgb.contiguous()) x_flow = self.flow_emb(x_flow.contiguous()) x = torch.cat((x_rgb, x_flow), 2) x = self.emb_out(x) vis_feat, all_emb = self.vis_emb(x) # vis_feat = self.vis_dropout(vis_feat) # B x T x H -> B x H x T # for 1d conv vis_feat = vis_feat.transpose(1, 2).contiguous() prop_lst = [] for i, kernel in enumerate(self.prop_out): kernel_size = self.kernel_list[i] if kernel_size <= vis_feat.size(-1): pred_o = kernel(vis_feat) anchor_c = Variable( torch.FloatTensor( np.arange( float(kernel_size) / 2.0, float(T + 1 - kernel_size / 2.0), math.ceil(kernel_size / stride_factor))).type(dtype)) if anchor_c.size(0) != pred_o.size(-1): raise Exception("size mismatch!") anchor_c = anchor_c.expand(B, 1, anchor_c.size(0)) anchor_l = Variable( torch.FloatTensor( anchor_c.size()).fill_(kernel_size).type(dtype)) pred_final = torch.cat((pred_o, anchor_l, anchor_c), 1) prop_lst.append(pred_final) else: print('skipping kernel sizes greater than {}'.format( self.kernel_list[i])) break # Important! In prop_all, for the first dimension, the four values are proposal score, overlapping score (DEPRECATED!), length offset, and center offset, respectively prop_all = torch.cat(prop_lst, 2) if B != s_pos.size(0) or B != s_neg.size(0): raise Exception('feature and ground-truth segment do not match!') sample_each = self.nsamples // 2 pred_score = Variable( torch.FloatTensor(np.zeros((sample_each * B, 2))).type(dtype)) gt_score = Variable( torch.FloatTensor(np.zeros((sample_each * B, 2))).type(dtype)) pred_offsets = Variable( torch.FloatTensor(np.zeros((sample_each * B, 2))).type(dtype)) gt_offsets = Variable( torch.FloatTensor(np.zeros((sample_each * B, 2))).type(dtype)) # B x T x H batch_mask = Variable( torch.FloatTensor(np.zeros((B, T, 1))).type(dtype)) # store positional encodings, size of B x 4, # the first B values are predicted starts, # second B values are predicted ends, # third B values are anchor starts, # last B values are anchor ends pe_locs = Variable(torch.zeros(B * 4).type(dtype)) anchor_window_mask = Variable(torch.zeros(B, T).type(dtype), requires_grad=False) pred_bin_window_mask = Variable(torch.zeros(B, T).type(dtype), requires_grad=False) gate_scores = Variable(torch.zeros(B, 1, 1).type(dtype)) mask_loss = None pred_len = prop_all[:, 4, :] * torch.exp(prop_all[:, 2, :]) pred_cen = prop_all[:, 5, :] + prop_all[:, 4, :] * prop_all[:, 3, :] for b in range(B): pos_anchor = s_pos[b] neg_anchor = s_neg[b] if pos_anchor.size(0) != sample_each or neg_anchor.size( 0) != sample_each: raise Exception( "# of positive or negative samples does not match") # randomly choose one of the positive samples to caption pred_index = np.random.randint(sample_each) # random sample anchors from different length for i in range(sample_each): # sample pos anchors pos_sam = pos_anchor[i].data pos_sam_ind = int(pos_sam[0]) pred_score[b * sample_each + i, 0] = prop_all[b, 0, pos_sam_ind] gt_score[b * sample_each + i, 0] = 1 pred_offsets[b * sample_each + i] = prop_all[b, 2:4, pos_sam_ind] gt_offsets[b * sample_each + i] = pos_sam[2:] # sample neg anchors neg_sam = neg_anchor[i].data neg_sam_ind = int(neg_sam[0]) pred_score[b * sample_each + i, 1] = prop_all[b, 0, neg_sam_ind] gt_score[b * sample_each + i, 1] = 0 # caption the segment if i == pred_index: # only need once, since one sample corresponds to one sentence only # TODO Is that true? Why cannot caption all? # anchor length, 4, 5 are the indices for anchor length and center anc_len = prop_all[b, 4, pos_sam_ind].data.item() anc_cen = prop_all[b, 5, pos_sam_ind].data.item() # grount truth length, 2 and 3 are indices for ground truth length and center # see line 260 and 268 in anet_dataset.py # dont need to use index since now i is 0 and everything is matching # length is after taking log gt_len = np.exp(pos_sam[2].item()) * anc_len gt_cen = pos_sam[3].item() * anc_len + anc_cen gt_window_mask = torch.zeros(T, 1).type(dtype) gt_window_mask[max(0, math.floor(gt_cen - gt_len / 2.) ):min(T, math.ceil(gt_cen + gt_len / 2.)), :] = 1. gt_window_mask = Variable(gt_window_mask, requires_grad=False) batch_mask[b] = gt_window_mask # batch_mask[b] = anchor_window_mask crt_pred_cen = pred_cen[b, pos_sam_ind] crt_pred_len = pred_len[b, pos_sam_ind] pred_start_w = crt_pred_cen - crt_pred_len / 2.0 pred_end_w = crt_pred_cen + crt_pred_len / 2.0 pred_bin_window_mask[ b, math.floor(max(0, min(T - 1, pred_start_w.data.item()))): math.ceil(max(1, min(T, pred_end_w.data.item())))] = 1. if self.learn_mask: anchor_window_mask[ b, max(0, math.floor(anc_cen - anc_len / 2.) ):min(T, math.ceil(anc_cen + anc_len / 2.))] = 1. pe_locs[b] = pred_start_w pe_locs[B + b] = pred_end_w pe_locs[B * 2 + b] = Variable( torch.Tensor([ max(0, math.floor(anc_cen - anc_len / 2.)) ]).type(dtype)) pe_locs[B * 3 + b] = Variable( torch.Tensor([ min(T, math.ceil(anc_cen + anc_len / 2.)) ]).type(dtype)) # gate_scores[b] = pred_score[b*sample_each+i, 0].detach() gate_scores[b] = pred_score[b * sample_each + i, 0] if self.learn_mask: pos_encs = positional_encodings(pe_locs, self.d_model // 4) in_pred_mask = torch.cat( (pos_encs[:B], pos_encs[B:B * 2], pos_encs[B * 2:B * 3], pos_encs[B * 3:B * 4], anchor_window_mask), 1) pred_mask = self.mask_model(in_pred_mask).view(B, T, 1) if gated_mask: gate_scores = F.sigmoid(gate_scores) window_mask = ( gate_scores * pred_bin_window_mask.view(B, T, 1) # window_mask = (gate_scores * batch_mask + (1 - gate_scores) * pred_mask) else: window_mask = pred_mask mask_loss = F.binary_cross_entropy_with_logits( pred_mask, pred_bin_window_mask.view(B, T, 1)) # mask_loss = F.binary_cross_entropy_with_logits(window_mask, batch_mask) else: window_mask = pred_bin_window_mask.view(B, T, 1) # window_mask = batch_mask pred_sentence, gt_cent = self.cap_model(x, sentence, window_mask, sample_prob=sample_prob) scst_loss = None if scst: scst_loss = self.cap_model.scst(x, batch_mask, sentence) return (pred_score, gt_score, pred_offsets, gt_offsets, pred_sentence, gt_cent, scst_loss, mask_loss)
def forward(self, x): x = self.conv(x) return F.sigmoid(x)
def forward(self, encoder_output): s_logits = F.sigmoid(self.start_position_fc(encoder_output)) e_logits = F.sigmoid(self.end_position_fc(encoder_output)) return s_logits, e_logits
def train_tom(opt, train_loader, model, board): model.cuda() model.train() # criterion criterionL1 = nn.L1Loss() criterionVGG = VGGLoss() criterionMask = nn.L1Loss() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.5, 0.999)) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: 1.0 - max(0, step - opt.keep_step) / float( opt.decay_step + 1)) for step in range(opt.keep_step + opt.decay_step): iter_start_time = time.time() inputs = train_loader.next_batch() im = inputs['image'].cuda() im_pose = inputs['pose_image'] im_h = inputs['head'] shape = inputs['shape'] agnostic = inputs['agnostic'].cuda() c = inputs['cloth'].cuda() cm = inputs['cloth_mask'].cuda() outputs = model(torch.cat([agnostic, c], 1)) p_rendered, m_composite = torch.split(outputs, 3, 1) p_rendered = F.tanh(p_rendered) m_composite = F.sigmoid(m_composite) p_tryon = c * m_composite + p_rendered * (1 - m_composite) visuals = [[im_h, shape, im_pose], [c, cm * 2 - 1, m_composite * 2 - 1], [p_rendered, p_tryon, im]] loss_l1 = criterionL1(p_tryon, im) loss_vgg = criterionVGG(p_tryon, im) loss_mask = criterionMask(m_composite, cm) loss = loss_l1 + loss_vgg + loss_mask optimizer.zero_grad() loss.backward() optimizer.step() if (step + 1) % opt.display_count == 0: board_add_images(board, 'combine', visuals, step + 1) board.add_scalar('metric', loss.item(), step + 1) board.add_scalar('L1', loss_l1.item(), step + 1) board.add_scalar('VGG', loss_vgg.item(), step + 1) board.add_scalar('MaskL1', loss_mask.item(), step + 1) t = time.time() - iter_start_time print( 'step: %8d, time: %.3f, loss: %.4f, l1: %.4f, vgg: %.4f, mask: %.4f' % (step + 1, t, loss.item(), loss_l1.item(), loss_vgg.item(), loss_mask.item()), flush=True) if (step + 1) % opt.save_count == 0: save_checkpoint( model, os.path.join(opt.checkpoint_dir, opt.name, 'step_%06d.pth' % (step + 1)))
def forward(self, x): x = nn.MaxPool2d(kernel_size=(x.size(2),x.size(3)))(x) x = F.relu(self.conv1(x), inplace=True) x = F.sigmoid(self.conv2(x)) return x
def _gumbel_sigmoid_sample(self, logits, tau=0.8): gumbel_noise = Variable(self._sample_gumbel(logits.size(), out=logits.data.new())) y = (logits + gumbel_noise) / tau return F.sigmoid(y)
def main(): # torch.manual_seed(1234) # torch.cuda.manual_seed(1234) opt = TrainOptions() args = opt.initialize() _t = {'iter time' : Timer()} model_name = args.source + '_to_' + args.target if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) os.makedirs(os.path.join(args.snapshot_dir, 'logs')) opt.print_options(args) sourceloader, targetloader = CreateSrcDataLoader(args), CreateTrgDataLoader(args) targetloader_iter, sourceloader_iter = iter(targetloader), iter(sourceloader) model, optimizer = CreateModel(args) model_D, optimizer_D = CreateDiscriminator(args) start_iter = 0 if args.restore_from is not None: start_iter = int(args.restore_from.rsplit('/', 1)[1].rsplit('_')[1]) train_writer = tensorboardX.SummaryWriter(os.path.join(args.snapshot_dir, "logs", model_name)) bce_loss = torch.nn.BCEWithLogitsLoss() cent_loss=ConditionalEntropyLoss2() cudnn.enabled = True cudnn.benchmark = True model.train() model.cuda() model_D.train() model_D.cuda() loss = ['loss_seg_src', 'loss_seg_trg', 'loss_D_trg_fake', 'loss_D_src_real', 'loss_D_trg_real'] _t['iter time'].tic() pbar = tqdm(range(start_iter,args.num_steps_stop)) #for i in range(start_iter, args.num_steps): for i in pbar: model.adjust_learning_rate(args, optimizer, i) model_D.adjust_learning_rate(args, optimizer_D, i) optimizer.zero_grad() optimizer_D.zero_grad() for param in model_D.parameters(): param.requires_grad = False src_img, src_lbl, _, _ = sourceloader_iter.next() src_img, src_lbl = Variable(src_img).cuda(), Variable(src_lbl.long()).cuda() src_seg_score = model(src_img) loss_seg_src = CrossEntropy2d(src_seg_score, src_lbl) #loss_seg_src = model.loss loss_seg_src.backward() if args.data_label_folder_target is not None: trg_img, trg_lbl, _, _ = targetloader_iter.next() trg_img, trg_lbl = Variable(trg_img).cuda(), Variable(trg_lbl.long()).cuda() trg_seg_score = model(trg_img) loss_seg_trg = model.loss else: trg_img, _, name = targetloader_iter.next() trg_img = Variable(trg_img).cuda() trg_seg_score = model(trg_img) loss_seg_trg=0 #ipdb.set_trace() outD_trg = model_D(F.softmax(trg_seg_score)) loss_D_trg_fake = bce_loss(outD_trg, Variable(torch.FloatTensor(outD_trg.data.size()).fill_(0)).cuda()) src_seg_score1, trg_seg_score1 = src_seg_score.detach(), trg_seg_score.detach() if i > args.warm_up: _, _, h, w = trg_seg_score.size() outD_trg = nn.functional.upsample(outD_trg, (h, w), mode='bilinear', align_corners=True) D_out_sigmoid = F.sigmoid(outD_trg).data.cpu().numpy().squeeze(axis=1) ignore_mask = (D_out_sigmoid > args.mask_T) loss_seg_trg=cent_loss(trg_seg_score) ipdb.set_trace() loss_seg_trg[ignore_mask] = 0 loss_seg_trg=-torch.mean(loss_seg_trg) #loss_seg_trg = CrossEntropy2d(trg_seg_score, tar_gt) loss_trg = args.lambda_adv_target * loss_D_trg_fake + args.tar_vat*loss_seg_trg loss_trg.backward() if loss_seg_trg ==0: loss_seg_trg = torch.zeros(1) for param in model_D.parameters(): param.requires_grad = True #src_seg_score, trg_seg_score = src_seg_score.detach(), trg_seg_score.detach() outD_src = model_D(F.softmax(src_seg_score1)) loss_D_src_real = bce_loss(outD_src, Variable(torch.FloatTensor(outD_src.data.size()).fill_(0)).cuda())/ 2 loss_D_src_real.backward() outD_trg = model_D(F.softmax(trg_seg_score1)) loss_D_trg_real = bce_loss(outD_trg, Variable(torch.FloatTensor(outD_trg.data.size()).fill_(1)).cuda())/ 2 loss_D_trg_real.backward() d_loss=loss_D_src_real.data + loss_D_trg_real.data optimizer.step() optimizer_D.step() for m in loss: train_writer.add_scalar(m, eval(m), i+1) if (i+1) % args.save_pred_every == 0: print 'taking snapshot ...' torch.save(model.state_dict(), os.path.join(args.snapshot_dir, '%s_' %(args.source) +str(i+1)+'.pth' )) torch.save(model_D.state_dict(), os.path.join(args.snapshot_dir, '%s_' %(args.source) +str(i+1)+'_D.pth' )) if (i+1) % args.print_freq == 0: _t['iter time'].toc(average=False) print '[it %d][src seg loss %.4f][trg seg loss %.4f][adv loss %.4f][d loss %.4f][lr %.4f][%.2fs]' % \ (i + 1, loss_seg_src.data,loss_seg_trg.data, loss_D_trg_fake.data,d_loss,optimizer.param_groups[0]['lr']*10000, _t['iter time'].diff) if i + 1 > args.num_steps_stop: print 'finish training' break _t['iter time'].tic()
def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.sigmoid(self.fc3(x)) return x
def forward(self, x): # define how the data are used through the net x = x.view(x.shape[0], -1) # flattering on one single vector x = F.sigmoid(self.fc1(x)) x = F.sigmoid(self.fc2(x)) x = self.fc3(x) return x