def fun(x, y): x_1, x_2 = split(x, y, labels) max_1, _ = (x_1 + alpha).topk(k, dim=1) max_1 = max_1.mean(1) max_2, _ = x_1.topk(k - 1, dim=1) max_2 = (max_2.sum(1) + x_2) / k loss = torch.clamp(max_1 - max_2, min=0) return loss
def fun(x, y): x_1, x_2 = split(x, y, labels) # all scores are divided by (k * tau) x_1.div_(k * tau) x_2.div_(k * tau) # term 1: all terms that will *not* include the ground truth score # term 2: all terms that will include the ground truth score res = lsp(x_1) term_1, term_2 = res[1], res[0] term_1, term_2 = LogTensor(term_1), LogTensor(term_2) X_2 = LogTensor(x_2) cst = x_2.data.new(1).fill_(float(alpha) / tau) One_by_tau = LogTensor(ag.Variable(cst, requires_grad=False)) Loss_ = term_2 * X_2 loss_pos = (term_1 * One_by_tau + Loss_).torch() loss_neg = Loss_.torch() loss = tau * (loss_pos - loss_neg) return loss
print('Cuda: \t\t{}'.format(CUDA)) print('n_trials: \t\t{}'.format(n_trials)) print('-' * 70) torch.manual_seed(1234) scores = Variable(torch.randn(batch_size, n_classes)) target = torch.from_numpy(np.random.randint(n_classes, size=batch_size)) labels = torch.from_numpy(np.arange(n_classes)) if CUDA: target = target.cuda() labels = labels.cuda() scores = scores.cuda() x_1, x_2 = split(scores, Variable(target), labels) x_1.div_(k * tau) x_2.div_(k * tau) def timing_fun(fun, x, k, verbosity, double=False, n_trials=50, forward=1, use_buffer=False): times = [] for _ in range(n_trials): if double: x = x.double() x = Variable(x.data.clone(), requires_grad=not forward) if use_buffer: buffer = x.data.new(x.size(0), x.size(1), k + 1) if CUDA: torch.cuda.synchronize()