def hard_sigmoid(x): """ Computes element-wise hard sigmoid of x. See e.g. https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L279 """ x = (0.2 * x) + 0.5 x = F.threshold(-x, -1, -1) x = F.threshold(-x, 0, 0) return x
def find_tensor_peak_batch(heatmap, radius, downsample, threshold = 0.000001): assert heatmap.dim() == 3, 'The dimension of the heatmap is wrong : {}'.format(heatmap.size()) assert radius > 0 and isinstance(radius, numbers.Number), 'The radius is not ok : {}'.format(radius) num_pts, H, W = heatmap.size(0), heatmap.size(1), heatmap.size(2) assert W > 1 and H > 1, 'To avoid the normalization function divide zero' # find the approximate location: score, index = torch.max(heatmap.view(num_pts, -1), 1) index_w = (index % W).float() index_h = (index / W).float() def normalize(x, L): return -1. + 2. * x.data / (L-1) boxes = [index_w - radius, index_h - radius, index_w + radius, index_h + radius] boxes[0] = normalize(boxes[0], W) boxes[1] = normalize(boxes[1], H) boxes[2] = normalize(boxes[2], W) boxes[3] = normalize(boxes[3], H) #affine_parameter = [(boxes[2]-boxes[0])/2, boxes[0]*0, (boxes[2]+boxes[0])/2, # boxes[0]*0, (boxes[3]-boxes[1])/2, (boxes[3]+boxes[1])/2] #theta = torch.stack(affine_parameter, 1).view(num_pts, 2, 3) affine_parameter = torch.zeros((num_pts, 2, 3)) affine_parameter[:,0,0] = (boxes[2]-boxes[0])/2 affine_parameter[:,0,2] = (boxes[2]+boxes[0])/2 affine_parameter[:,1,1] = (boxes[3]-boxes[1])/2 affine_parameter[:,1,2] = (boxes[3]+boxes[1])/2 # extract the sub-region heatmap theta = affine_parameter.to(heatmap.device) grid_size = torch.Size([num_pts, 1, radius*2+1, radius*2+1]) grid = F.affine_grid(theta, grid_size) sub_feature = F.grid_sample(heatmap.unsqueeze(1), grid).squeeze(1) sub_feature = F.threshold(sub_feature, threshold, np.finfo(float).eps) X = torch.arange(-radius, radius+1).to(heatmap).view(1, 1, radius*2+1) Y = torch.arange(-radius, radius+1).to(heatmap).view(1, radius*2+1, 1) sum_region = torch.sum(sub_feature.view(num_pts,-1),1) x = torch.sum((sub_feature*X).view(num_pts,-1),1) / sum_region + index_w y = torch.sum((sub_feature*Y).view(num_pts,-1),1) / sum_region + index_h x = x * downsample + downsample / 2.0 - 0.5 y = y * downsample + downsample / 2.0 - 0.5 return torch.stack([x, y],1), score
high=1.0).to(device) # In[9]: with torch.no_grad(): dataset = [] for i in range(dataset_size): batch_x = torch.Tensor(size=(minibatch_size, m_count)).to(device) batch_x.uniform_(sub_min, sub_max) batch_y = reactions(batch_x) dataset.append((batch_x, batch_y)) if i % 1 == 0: r = (dataset[-1][1] - dataset[-1][0]).abs().sum(dim=1).neg() print("{}:{}".format( i, F.threshold(r, threshold=-1e-20, value=1).sum())) # In[10]: model = Process(reactions_count, metabolites, gan_generator, scount=m_count, pcount=m_count, step=step, iterations=iterations).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) ##optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=1.0) ##optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0) #optimizer = torch.optim.Adamax(model.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
def forward(ctx, input, threshold, value, inplace=False): # TODO: Generalise! mask = torch.zeros_like(input) mask[input>0] += 1. ctx.save_for_backward(mask) return F.threshold(input,threshold,value, inplace)
def proxy_grad_descent(self, t, lr): with torch.no_grad(): for (name,module),(_,module_old) in zip(self.actor_critic.base.named_children(),self.actor_critic_old.base.named_children()): if not isinstance(module, torch.nn.Linear) and not isinstance(module, torch.nn.Conv2d): continue mu = self.mu key = name weight = module.weight bias = module.bias weight_old = module_old.weight bias_old = module_old.bias if len(weight.size()) > 2: norm = weight.norm(2, dim=(1,2,3)) else: norm = weight.norm(2, dim=(1)) norm = (norm**2 + bias**2).pow(1/2) aux = F.threshold(norm - mu * lr, 0, 0, False) alpha = aux/(aux+mu*lr) coeff = alpha * (1-self.mask[key]) if len(weight.size()) > 2: sparse_weight = weight.data * coeff.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) else: sparse_weight = weight.data * coeff.unsqueeze(-1) sparse_bias = bias.data * coeff penalty_weight = 0 penalty_bias = 0 if t>0: if len(weight.size()) > 2: norm = (weight - weight_old).norm(2, dim=(1,2,3)) else: norm = (weight - weight_old).norm(2, dim=(1)) norm = (norm**2 + (bias-bias_old)**2).pow(1/2) aux = F.threshold(norm - self.omega[key]*self.lamb*lr, 0, 0, False) boonmo = lr*self.lamb*self.omega[key] + aux alpha = (aux / boonmo) alpha[alpha!=alpha] = 1 coeff_alpha = alpha * self.mask[key] coeff_beta = (1-alpha) * self.mask[key] if len(weight.size()) > 2: penalty_weight = coeff_alpha.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)*weight.data + \ coeff_beta.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)*weight_old.data else: penalty_weight = coeff_alpha.unsqueeze(-1)*weight.data + coeff_beta.unsqueeze(-1)*weight_old.data penalty_bias = coeff_alpha*bias.data + coeff_beta*bias_old.data weight.data = sparse_weight + penalty_weight bias.data = sparse_bias + penalty_bias return
def val_context_inpainting(iter_, epoch, net, coach=None, use_coach_masks=False): global best_loss progbar = tqdm(total=len(val_loader), desc='Val') net.eval() graph_test_loss = [] if coach is not None: coach.eval() val_loss.append(0) for batch_idx, (inputs_, masks, targets, prior) in enumerate(val_loader): inputs_, masks, targets, prior = Variable( inputs_.to(device)), Variable(masks.to(device).float()), Variable( targets.to(device)), Variable(prior.to(device)) if coach is not None: masks, _, _ = coach.forward(inputs_, alpha=100, use_coach=use_coach_masks) loss_rec = None loss_con = None masked_inputs = inputs_ * masks masked_prior = prior * masks outputs = net.forward_inpainting( torch.cat((masked_inputs, masked_prior), dim=1)) g_outputs, g_priors = torch.split(outputs, 3, dim=1) mse_loss = (g_outputs - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) prior_mse_loss = (g_priors - prior)**2 prior_mse_loss = -1 * F.threshold(-1 * prior_mse_loss, -2, -2) loss_rec = torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) prior_loss_rec = torch.sum(prior_mse_loss * (1 - masks)) / torch.sum(1 - masks) # calculate con loss if coach is not None: loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) prior_loss_con = torch.sum( prior_mse_loss * masks) / torch.sum(masks) else: outputs = net.forward_inpainting(inputs_ * (1 - masks)) g_outputs, g_priors = torch.split(outputs, 3, dim=1) mse_loss = (g_outputs - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) prior_mse_loss = (g_priors - prior)**2 prior_mse_loss = -1 * F.threshold(-1 * prior_mse_loss, -2, -2) loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) prior_loss_con = torch.sum( prior_mse_loss * masks) / torch.sum(masks) loss_rec = loss_rec + prior_loss_rec loss_con = loss_con + prior_loss_con total_loss = rec_weight * loss_rec + (1 - rec_weight) * loss_con val_loss[-1] += total_loss.item() progbar.set_description('Val (loss=%.4f)' % (val_loss[-1] / (batch_idx + 1))) progbar.update(1) graph_test_loss.append(total_loss.item()) val_loss[-1] = val_loss[-1] / len(val_loader) if best_loss > val_loss[-1]: best_loss = val_loss[-1] print('Saving..') torch.save( net.state_dict(), os.path.join(save_model_location, experiment + str(iter_) + '.net.best.ckpt.t7')) torch.save( coach.state_dict(), os.path.join(save_model_location, experiment + str(iter_) + '.coach.best.ckpt.t7')) average_graph_test_loss = sum(graph_test_loss) / len(graph_test_loss) return average_graph_test_loss
def forward(self, x): x = (self.slope * x) + self.offset x = F.threshold(-x, -1, -1) x = F.threshold(-x, 0, 0) return x
def hard_sigmoid(x): out = (0.2 * x) + 0.5 out = F.threshold(-out, -1., -1.) out = F.threshold(-out, 0., 0.) return out
def H(z): # Heaviside function return torch.div(F.threshold(z, 0, 0), z)
def train_context_inpainting(epoch, net, net_optimizer, coach=None, use_coach_masks=False): progbar = tqdm(total=len(train_loader), desc='Train') net.train() if coach is not None: coach.eval() train_loss.append(0) for batch_idx, (inputs_, masks, targets) in enumerate(train_loader): net_optimizer.zero_grad() inputs_, masks, targets = Variable(inputs_.to(device)), Variable( masks.to(device).float()), Variable(targets.to(device)) if coach is not None: masks, _, _ = coach.forward(inputs_, alpha=100, use_coach=use_coach_masks) outputs_1 = net(inputs_ * masks) loss_rec = None loss_con = None for output_1 in outputs_1: mse_loss = (output_1 - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) # calculate reconstruction loss if loss_rec is None: loss_rec = torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) else: loss_rec += torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) # calculate con loss if coach is not None: loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) else: outputs_2 = net(inputs_ * (1 - masks)) for output_2 in outputs_2: mse_loss = (output_2 - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) if loss_con is None: loss_con = torch.sum( mse_loss * masks) / torch.sum(masks) else: loss_con += torch.sum( mse_loss * masks) / torch.sum(masks) total_loss = rec_weight * loss_rec + (1 - rec_weight) * loss_con total_loss.backward() net_optimizer.step() train_loss[-1] += total_loss.data progbar.set_description('Train (loss=%.4f)' % (train_loss[-1] / (batch_idx + 1))) progbar.update(1) train_loss[-1] = train_loss[-1] / len(train_loader)
def val_context_inpainting(iter_, epoch, net, coach=None, use_coach_masks=False): global best_loss progbar = tqdm(total=len(val_loader), desc='Val') net.eval() if coach is not None: coach.eval() val_loss.append(0) for batch_idx, (inputs_, masks, targets) in enumerate(val_loader): inputs_, masks, targets = Variable(inputs_.to(device)), Variable( masks.to(device).float()), Variable(targets.to(device)) if coach is not None: masks, _, _ = coach.forward(inputs_, alpha=100, use_coach=use_coach_masks) loss_rec = None loss_con = None outputs_1 = net(inputs_ * masks) for output_1 in outputs_1: mse_loss = (output_1 - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) # calculate reconstruction loss if loss_rec is None: loss_rec = torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) else: loss_rec += torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) # calculate con loss if coach is not None: loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) else: outputs_2 = net(inputs_ * (1 - masks)) for output_2 in outputs_2: mse_loss = (output_2 - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) if loss_con is None: loss_con = torch.sum( mse_loss * masks) / torch.sum(masks) else: loss_con += torch.sum( mse_loss * masks) / torch.sum(masks) total_loss = rec_weight * loss_rec + (1 - rec_weight) * loss_con val_loss[-1] += total_loss.data progbar.set_description('Val (loss=%.4f)' % (val_loss[-1] / (batch_idx + 1))) progbar.update(1) val_loss[-1] = val_loss[-1] / len(val_loader) if best_loss > val_loss[-1]: best_loss = val_loss[-1] print('Saving..') state = {'context_inpainting_net': net, 'coach': coach} torch.save(state, model_root + experiment + str(iter_) + '.ckpt.t7')
def forward(self, input): return -1 * F.threshold(-1 * input, -1 * self.threshold, -1 * self.value, self.inplace)
def test_threshold(self): inp = torch.randn(1, 8, 32, 32, device='cuda', dtype=self.dtype) output = F.threshold(inp, 6, 6, inplace=False)
def jit_relu_dropout(x, prob, is_training): # type: (Tensor, float, bool) -> Tensor out = F.threshold(x, 0., 0.) out = F.dropout(out, p=prob, training=is_training) return out
def test_nested_inplace(self): x = Variable(torch.randn(2, 2)) trace, _ = torch.jit.trace(lambda x: F.threshold(x, 0, 0, inplace=True), (x,), nderivs=0) self.assertExpectedTrace(trace)
def forward(self, x): from torch.nn import functional as F return F.threshold(x, threshold=self.threshold, value=self.value)
def discretized_mix_logistic_loss_c1(x, l, sum_all=True): xs = x.size() # (B,32,32,1) ls = l.size() # (B,32,32,100) # here and below: unpacking the params of the mixture of logistics nr_mix = int(ls[-1] / 3) logit_probs = l[:, :, :, :nr_mix] # size: [B, 32, 32, nr_mix] # l = l[:, :, :, nr_mix:].contiguous().view(xs[0], xs[1], xs[2], xs[3], nr_mix * 3) # size: [B, 32, 32, 3, 3 * nr_mix] l = l[:, :, :, nr_mix:].contiguous().view(xs[0], xs[1], xs[2], xs[3], nr_mix * 2) # size: [B, 32, 32, 1, 2 * nr_mix] # size: [B, 32, 32, 1, nr_mix] means = l[:, :, :, :, :nr_mix] log_scales = F.threshold(l[:, :, :, :, nr_mix:2 * nr_mix], -7., -7.) # coeffs = torch.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) # here and below: getting the means and adjusting them based on preceding # sub-pixels x = x.unsqueeze(4).expand(xs[0], xs[1], xs[2], xs[3], nr_mix) # size: [B, 32, 32, C, nr_mix] # m1 = means[:, :, :, 0, :] # m2 = means[:, :, :, 1, :] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :] # m3 = means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x[:, :, :, 1, :] # means = torch.cat([m1, m2, m3], 3) centered_x = x - means inv_stdv = torch.exp(-log_scales) plus_in = inv_stdv * (centered_x + 1. / 255.) cdf_plus = F.sigmoid(plus_in) min_in = inv_stdv * (centered_x - 1. / 255.) cdf_min = F.sigmoid(min_in) # log probability for edge case of 0 (before scaling) log_cdf_plus = plus_in - F.softplus(plus_in) # log probability for edge case of 255 (before scaling) log_one_minus_cdf_min = -F.softplus(min_in) cdf_delta = cdf_plus - cdf_min # probability for all other cases mid_in = inv_stdv * centered_x # log probability in the center of the bin, to be used in extreme cases # (not actually used in our code) log_pdf_mid = mid_in - log_scales - 2. * F.softplus(mid_in) # now select the right output: left edge case, right edge case, normal # case, extremely low prob case (doesn't actually happen for us) mask1 = (cdf_delta > 1e-5).float().detach() term1 = mask1 * torch.log(F.threshold(cdf_delta, 1e-12, 1e-12)) + ( 1. - mask1) * (log_pdf_mid - np.log(127.5)) mask2 = (x > 0.999).float().detach() term2 = mask2 * log_one_minus_cdf_min + (1. - mask2) * term1 mask3 = (x < -0.999).float().detach() term3 = mask3 * log_cdf_plus + (1. - mask3) * term2 log_probs = term3.sum(3) + log_prob_from_logits(logit_probs) if not sum_all: return -log_sum_exp(log_probs).sum(1, keepdim=True).sum( 2, keepdim=True).squeeze() else: return -log_sum_exp(log_probs).sum()
def func_b(module, grad_in, grad_out): self.all_grads[id(module)] = grad_in[0].cpu() # Cut off negative gradients if isinstance(module, nn.ReLU): return (F.threshold(grad_in[0], threshold=0.0, value=0.0),)
def train_estimation(self): for epoch in range(self.training_params['n_epochs']): for idx, (tensorImage, GTdisparities, sparseMask, imageNetTensor, dataset_ids) in enumerate( tqdm(self.data_loader, desc='Epoch %d/%d' % (epoch + 1, self.training_params['n_epochs']))): if ((idx + 1) % 500) == 0: save_model( { 'disparity': { 'model': self.moduleDisparity, 'opt': self.optimizer_disparity, 'schedule': self.scheduler_disparity, 'save_name': self.training_params['save_name'] } }, self.iter_nb) self.validation() tensorImage = tensorImage.to(device, non_blocking=True) GTdisparities = GTdisparities.to(device, non_blocking=True) sparseMask = sparseMask.to(device, non_blocking=True) imageNetTensor = imageNetTensor.to(device, non_blocking=True) with torch.no_grad(): semantic_tensor = self.moduleSemantics(tensorImage) # forward pass tensorDisparity = self.moduleDisparity( tensorImage, semantic_tensor) # depth estimation tensorDisparity = F.threshold(tensorDisparity, threshold=0.0, value=0.0) # reconstruction loss computation estimation_loss_ord = compute_loss_ord(tensorDisparity, GTdisparities, sparseMask, mode='logrmse') estimation_loss_grad = compute_loss_grad( tensorDisparity, GTdisparities, sparseMask) # loss weights computation beta = 0.015 gamma_ord = 0.03 * (1 + 2 * np.exp(-beta * self.iter_nb) ) # for scale-invariant Loss # gamma_ord = 0.001 * (1+ 200 * np.exp( - beta * self.iter_nb)) # for L1 loss gamma_grad = 1 - np.exp(-beta * self.iter_nb) gamma_mask = 0.0001 * (1 - np.exp(-beta * self.iter_nb)) if self.training_params['mask_loss'] == 'same': # when mask_loss is 'same' masks are computed on the same images with torch.no_grad(): objectPredictions = self.moduleMaskrcnn(tensorImage) masks_tensor_list = list( map( lambda object_pred: resize_image( object_pred['masks'], max_size=256), objectPredictions)) estimation_masked_loss = 0 for i, masks_tensor in enumerate(masks_tensor_list): if masks_tensor is not None: estimation_masked_loss += compute_masked_grad_loss( tensorDisparity[i].view( 1, *tensorDisparity[i].shape), masks_tensor, [1], 0.5) loss_depth = gamma_ord * estimation_loss_ord + gamma_grad * estimation_loss_grad + gamma_mask * estimation_masked_loss else: # No mask loss in this case loss_depth = gamma_ord * estimation_loss_ord + gamma_grad * estimation_loss_grad # compute gradients and update net self.optimizer_disparity.zero_grad() loss_depth.backward() torch.nn.utils.clip_grad_norm_( self.moduleDisparity.parameters(), 1) self.optimizer_disparity.step() self.scheduler_disparity.step() # keep track of loss values self.writer.add_scalar('Estimation/Loss ord', estimation_loss_ord, self.iter_nb) self.writer.add_scalar('Estimation/Loss grad', estimation_loss_grad, self.iter_nb) self.writer.add_scalar('Estimation/Loss depth', loss_depth, self.iter_nb) if self.training_params['mask_loss'] == 'same': self.writer.add_scalar('Estimation/Loss mask', estimation_masked_loss, self.iter_nb) elif self.training_params['mask_loss'] == 'other': self.step_imagenet( imageNetTensor ) # when mask loss is computed on another dataset else: self.writer.add_scalar('Estimation/Loss mask', 0, self.iter_nb) # keep track of gradient magnitudes # for i, m in enumerate(self.moduleDisparity.modules()): # if m.__class__.__name__ == 'Conv2d': # g = m.weight.grad # # print(g) # if g is not None: # self.writer.add_scalar('Estimation gradients/Conv {}'.format(i), torch.norm(g/g.size(0), p=1).item(), self.iter_nb) self.iter_nb += 1 self.validation()
def threshold(input, *args, **kwargs): return _wrap_tensor(input, F.threshold(input.F, *args, **kwargs))
def relu_derivative(x: torch.Tensor) -> torch.Tensor: return threshold(x, 0.0, 1.0)
def forward(self, input): # max(0,x) + min(0, alpha * (exp(x) - 1)) return ( F.threshold(input, 0.0, 0.0, self.inplace) - F.threshold(-self.alpha.expand_as(input) * (torch.exp(input) - 1), 0.0, 0.0, self.inplace))
def train_context_inpainting(epoch, net, net_optimizer, coach=None, use_coach_masks=False): progbar = tqdm(total=len(train_loader), desc='Train') net.train() graph_train_loss = [] if coach is not None: coach.eval() train_loss.append(0) for batch_idx, (inputs_, masks, targets, prior) in enumerate(train_loader): net_optimizer.zero_grad() inputs_, masks, targets = Variable(inputs_.to(device)), Variable( masks.to(device).float()), Variable(targets.to(device)) prior = Variable(prior.to(device)) if coach is not None: masks, _, _ = coach.forward(inputs_, alpha=100, use_coach=use_coach_masks) masked_inputs_ = inputs_ * masks masked_prior = prior * masks outputs = net.forward_inpainting( torch.cat((masked_inputs_, masked_prior), dim=1)) g_outputs, g_priors = torch.split(outputs, 3, dim=1) mse_loss = (g_outputs - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) prior_mse_loss = (g_priors - prior)**2 prior_mse_loss = -1 * F.threshold(-1 * prior_mse_loss, -2, -2) loss_rec = torch.sum(mse_loss * (1 - masks)) / torch.sum(1 - masks) prior_loss_rec = torch.sum(prior_mse_loss * (1 - masks)) / torch.sum(1 - masks) # calculate con loss if coach is not None: loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) prior_loss_con = torch.sum( prior_mse_loss * masks) / torch.sum(masks) else: outputs = net.forward_inpainting(inputs_ * (1 - masks)) g_outputs, g_priors = torch.split(outputs, 3, dim=1) mse_loss = (g_outputs - targets)**2 mse_loss = -1 * F.threshold(-1 * mse_loss, -2, -2) prior_mse_loss = (g_priors - prior)**2 prior_mse_loss = -1 * F.threshold(-1 * prior_mse_loss, -2, -2) loss_con = torch.sum(mse_loss * masks) / torch.sum(masks) prior_loss_con = torch.sum( prior_mse_loss * masks) / torch.sum(masks) loss_rec = loss_rec + prior_loss_rec loss_con = loss_con + prior_loss_con total_loss = rec_weight * loss_rec + (1 - rec_weight) * loss_con total_loss.backward() net_optimizer.step() train_loss[-1] += total_loss.data graph_train_loss.append(total_loss.item()) progbar.set_description('Train (loss=%.4f)' % (train_loss[-1] / (batch_idx + 1))) progbar.update(1) train_loss[-1] = train_loss[-1] / len(train_loader) average_graph_train_loss = sum(graph_train_loss) / len(graph_train_loss) return average_graph_train_loss
def _tanh_loss(self, x, y): x = torch.tanh(F.threshold(torch.log(x), -100.0, -100.0) / 4.0) y = torch.tanh(F.threshold(torch.log(y), -100.0, -100.0) / 4.0) return self.loss_func_loss(x, y)
def test_nested_inplace(self): x = Variable(torch.randn(2, 2)) trace, _ = torch.jit.trace( lambda x: F.threshold(x, 0, 0, inplace=True), (x, ), nderivs=0) self.assertExpectedTrace(trace)
def H(x): # Heaviside function, 0 if x < 0 else 1 return torch.div(F.threshold(x, 0, 0), x)
def relu(input): return F.threshold(input, 0, 0, inplace=True)
def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, logger): print('train at epoch {}'.format(epoch)) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # for tsn if opt.model_type == 'tsn': if opt.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) end_time = time.time() model.train() for iteration, (inputs, targets, _) in enumerate(data_loader): data_time.update(time.time() - end_time) if torch.cuda.is_available(): targets = targets.cuda() if opt.model_type == 'mmt': # use masks as labels: all, internal, external masks = F.interpolate( inputs[1].view((-1, opt.n_channels) + inputs[1].size()[-2:]).cuda(), size=[ inputs[1].size()[-1] // 32, inputs[1].size()[-1] // 32 ]) inputs = inputs[0].cuda() elif opt.model_type == 'chmmt': # use masks as labels: all, internal, external masks = inputs[1].mean(axis=1).cuda() masks = (F.interpolate(masks, size=[ inputs[1].size()[-1] // 32, inputs[1].size()[-1] // 32 ]) > 1 / 6).to(torch.float).cuda() inputs = inputs[0].cuda() elif opt.model_type == 'mtsn': inputs = inputs[0].cuda() else: inputs = inputs.cuda() if opt.model_type == 'mmt' or opt.model_type == 'chmmt': outputs, branch_out = model(inputs) loss = criterion(outputs, targets).mean() + dice_loss( branch_out[0], masks[:, 1]) + dice_loss( branch_out[1], masks[:, 2]) else: outputs = model(inputs) loss = criterion(outputs, targets) # update with hard examples when learning rate is small if epoch > 100: #opt.n_epochs * 0.5 - 1: large_loss = F.threshold(loss.mean(axis=1), 0.8, 0., inplace=True) small_loss = F.threshold(loss.mean(axis=1), 0.2, 0., inplace=True) if (large_loss > 0).any(): back_loss = large_loss.sum() / (large_loss > 0).sum() # print((back_loss > 0).sum().cpu().item() / opt.batch_size) elif (small_loss > 0).any(): back_loss = small_loss.sum() / (small_loss > 0).sum() # print((back_loss > 0).sum().cpu().item() / opt.batch_size) else: back_loss = loss.mean() else: back_loss = loss.mean() losses.update(back_loss.item(), inputs.size(0)) with amp.scale_loss(back_loss, optimizer) as scaled_loss: scaled_loss.backward() # loss.backward() # # print logits for debugging # neg = 0 # pos = 0 # for i in range(len(targets)): # if (not targets[i].shape and targets[i]==1) or (targets[i].shape and targets[i].cpu().numpy()[opt.concern_label]): # if pos < 2: # print('positive logits:', outputs[i].data.cpu(), targets[i].data.cpu(), loss[i].mean().item()) # pos += 1 # elif neg < 2: # print('\nnegative logits:', outputs[i].data.cpu(), targets[i].data.cpu(), loss[i].mean().item()) # neg += 1 # from TSN: clip gradients if opt.clip_gradient is not None: total_norm = clip_grad_norm(model.parameters(), opt.clip_gradient) if total_norm > opt.clip_gradient: print("clipping gradient: {} with coef {}".format( total_norm, opt.clip_gradient / total_norm)) optimizer.step() optimizer.zero_grad() batch_time.update(time.time() - end_time) end_time = time.time() if iteration % opt.print_freq == 0: logger.info(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, iteration + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, lr=optimizer.param_groups[-1]['lr'])))
def forward(self, input): self.activations = F.threshold(input, self.threshold, self.value, self.inplace) return self.activations
def relu(input): return F.threshold(input, 0, 0, inplace=True)
def forward(self, tensorMasks, tensorImage=None, tensorDisparity=None, tensorData=None, tensorContext=None): if tensorImage is not None and tensorContext is None: tensorImage, tensorDisparity = self.normalize_images_disp( tensorImage, tensorDisparity, not_normed=True) if tensorData is None and tensorContext is not None: tensorData = torch.cat( [tensorImage, tensorDisparity, tensorContext], 1) elif tensorData is None: tensorContext = self.moduleContext( torch.cat([tensorImage, tensorDisparity], 1)) tensorData = torch.cat( [tensorImage, tensorDisparity, tensorContext], 1) tensorColumn = [None, None, None, None] tensorColumn[0] = self.moduleInput( torch.cat([tensorData, tensorMasks], 1)) tensorColumn[1] = self._modules['0x0 - 1x0'](tensorColumn[0]) tensorColumn[2] = self._modules['1x0 - 2x0'](tensorColumn[1]) tensorColumn[3] = self._modules['2x0 - 3x0'](tensorColumn[2]) intColumn = 1 for intRow in range(len(tensorColumn)): tensorColumn[intRow] = self._modules[str(intRow) + 'x' + str(intColumn - 1) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow]) if intRow != 0: tensorColumn[intRow] += self._modules[str(intRow - 1) + 'x' + str(intColumn) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow - 1]) # end # end intColumn = 2 for intRow in range(len(tensorColumn) - 1, -1, -1): tensorColumn[intRow] = self._modules[str(intRow) + 'x' + str(intColumn - 1) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow]) if intRow != len(tensorColumn) - 1: tensorUp = self._modules[str(intRow + 1) + 'x' + str(intColumn) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow + 1]) if tensorUp.size(2) != tensorColumn[intRow].size(2): tensorUp = F.pad(input=tensorUp, pad=[0, 0, 0, -1], mode='constant', value=0.0) if tensorUp.size(3) != tensorColumn[intRow].size(3): tensorUp = F.pad(input=tensorUp, pad=[0, -1, 0, 0], mode='constant', value=0.0) tensorColumn[intRow] += tensorUp # end # end intColumn = 3 for intRow in range(len(tensorColumn) - 1, -1, -1): tensorColumn[intRow] = self._modules[str(intRow) + 'x' + str(intColumn - 1) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow]) if intRow != len(tensorColumn) - 1: tensorUp = self._modules[str(intRow + 1) + 'x' + str(intColumn) + ' - ' + str(intRow) + 'x' + str(intColumn)]( tensorColumn[intRow + 1]) if tensorUp.size(2) != tensorColumn[intRow].size(2): tensorUp = F.pad(input=tensorUp, pad=[0, 0, 0, -1], mode='constant', value=0.0) if tensorUp.size(3) != tensorColumn[intRow].size(3): tensorUp = F.pad(input=tensorUp, pad=[0, -1, 0, 0], mode='constant', value=0.0) tensorColumn[intRow] += tensorUp # end # end tensorImage = self.moduleImage(tensorColumn[0]) tensorDisparity = self.moduleDisparity(tensorColumn[0]) tensorImage, tensorDisparity = self.normalize_images_disp( tensorImage, tensorDisparity, not_normed=False) return { 'tensorExisting': tensorMasks, 'tensorImage': tensorImage.clamp(0.0, 1.0) if self.training == False else tensorImage, 'tensorDisparity': F.threshold(input=tensorDisparity, threshold=0.0, value=0.0) }
def test_threshold(x, y): c = F.threshold(torch.add(x, y), 0.5, 10) return c
def forward(self, x, y, z, w): x = F.threshold(x, 0.1, 20) y = F.threshold(y, 0.3, 0.4) z = F.threshold(z, 0.1, 20) w = F.threshold(w, 0.3, 0.4) return x, y, z, w
def forward(self, x: Tensor, encoder_out: Tensor, encoder_padding_mask: Optional[Tensor], incremental_state: Optional[Dict[str, Dict[str, Tensor]]]): residual = x x, _ = self.self_attn(query=x, key=x, value=x, mask_future_timesteps=True, key_padding_mask=None, incremental_state=incremental_state, need_weights=False, static_kv=False) if self.training: if self.platform == "npu": x, _, _ = torch.dropoutV2(x, self.seed, p=self.prob) elif self.platform == "gpu": x = self.dropout(x) x = residual + x x = self.self_attn_layer_norm(x) attn = None if self.encoder_attn is not None: residual = x x, attn = self.encoder_attn( query=x, key=encoder_out, value=encoder_out, key_padding_mask=encoder_padding_mask, incremental_state=incremental_state, static_kv=True, mask_future_timesteps=False, need_weights=(not self.training and self.need_attn), ) if self.training: if self.platform == "npu": x, _, _ = torch.dropoutV2(x, self.seed, p=self.prob) elif self.platform == "gpu": x = self.dropout(x) x = residual + x x = self.encoder_attn_layer_norm(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) if self.training: if self.platform == "npu": x, _, _ = torch.dropoutV2(x, self.seed, p=self.relu_prob) elif self.platform == "gpu": x = self.relu_dropout(x) x = self.fc2(x) if self.training: if self.platform == "npu": x, _, _ = torch.dropoutV2(x, self.seed, p=self.prob) elif self.platform == "gpu": x = self.dropout(x) x = residual + x x = self.layer_norm(x) return x, attn