def forward(self, input, target): y_true = target.int().unsqueeze(-1) same_id = torch.eq(y_true, y_true.t()).type_as(input) pos_mask = same_id neg_mask = 1 - same_id def _mask_max(input_tensor, mask, axis=None, keepdims=False): input_tensor = input_tensor - 1e6 * (1 - mask) _max, _idx = torch.max(input_tensor, dim=axis, keepdim=keepdims) return _max, _idx def _mask_min(input_tensor, mask, axis=None, keepdims=False): input_tensor = input_tensor + 1e6 * (1 - mask) _min, _idx = torch.min(input_tensor, dim=axis, keepdim=keepdims) return _min, _idx # output[i, j] = || feature[i, :] - feature[j, :] ||_2 dist_squared = torch.sum(input ** 2, dim=1, keepdim=True) + \ torch.sum(input.t() ** 2, dim=0, keepdim=True) - \ 2.0 * torch.matmul(input, input.t()) dist = dist_squared.clamp(min=1e-16).sqrt() pos_max, pos_idx = _mask_max(dist, pos_mask, axis=-1) neg_min, neg_idx = _mask_min(dist, neg_mask, axis=-1) # loss(x, y) = max(0, -y * (x1 - x2) + margin) y = torch.ones(same_id.size()[0]).to(DEVICE) return F.margin_ranking_loss(neg_min.float(), pos_max.float(), y, self.margin, self.size_average)
def forward( self, pos_scores: FloatTensorType, neg_scores: FloatTensorType, weight: Optional[FloatTensorType], ) -> FloatTensorType: num_pos = match_shape(pos_scores, -1) num_neg = match_shape(neg_scores, num_pos, -1) # FIXME Workaround for https://github.com/pytorch/pytorch/issues/15223. if num_pos == 0 or num_neg == 0: return torch.zeros((), device=pos_scores.device, requires_grad=True) if weight is not None: match_shape(weight, num_pos) loss_per_sample = F.margin_ranking_loss( neg_scores, pos_scores.unsqueeze(1), target=pos_scores.new_full((1, 1), -1, dtype=torch.float), margin=self.margin, reduction="none", ) loss = (loss_per_sample * weight.unsqueeze(-1)).sum() else: # more memory efficient way if no weights loss = F.margin_ranking_loss( neg_scores, pos_scores.unsqueeze(1), target=pos_scores.new_full((1, 1), -1, dtype=torch.float), margin=self.margin, reduction="sum", ) return loss
def forward(self, anchor, positive, negative): positive_similarity = F.cosine_similarity(anchor, positive) negative_similarity = F.cosine_similarity(anchor, negative) labels = torch.ones(positive_similarity.size()) if self.margin is None: diff = 1 - (torch.mean(positive_similarity) - torch.mean(negative_similarity)) margin = diff.item() else: margin = self.margin loss = F.margin_ranking_loss(positive_similarity, negative_similarity, labels.to(anchor.device), margin=margin) return loss
def forward(self, inputs, targets): softmax_out = inputs[0] trip_out = inputs[1] sf_num = len(softmax_out) total_cls_loss = 0 for i in range(0, sf_num): # total_cls_loss += self.xentropy_loss(softmax_out[i],targets) total_cls_loss += F.cross_entropy(softmax_out[i], targets) trip_num = len(trip_out) total_trip_loss = 0 if self.has_trip: for i in range(0, trip_num): input_fea = trip_out[i] n = input_fea.size(0) num_person = n // self.num_instances # Compute pairwise distance, replace by the official when merged dist = torch.pow(input_fea, 2).sum(1).expand(n, n) dist = dist + dist.t() dist.addmm_(1, -2, input_fea, input_fea.t()) dist = dist.clamp(min=1e-12).sqrt() # for numerical stability # For each anchor, find the hardest positive and negative mask = targets.expand(n, n).eq(targets.expand(n, n).t()) dist_ap, dist_an = [], [] for i in range(n): hard_positive = dist[i][mask[i]].max() dist_ap.append(hard_positive) hard_negative = dist[i][mask[i] == 0].min(0) dist_an.append(hard_negative[0]) # dist_ap = torch.cat(dist_ap) # dist_an = torch.cat(dist_an) # Compute ranking hinge loss dist_ap = torch.stack(dist_ap) dist_an = torch.stack(dist_an) y = dist_an.data.new() y.resize_as_(dist_an.data) y.fill_(1) y = Variable(y) temp_trip_loss = F.margin_ranking_loss(dist_an, dist_ap, y, self.margin1) total_trip_loss += temp_trip_loss loss = self.gamma * total_cls_loss + self.alpha * total_trip_loss accuracy_val, = accuracy(softmax_out[0].data, targets.data) prec = accuracy_val[0] return loss, prec
def forward(self, inputs, targets): random = torch.randperm(inputs.size(0)) pred_loss = inputs[random] pred_lossi = inputs[:inputs.size(0) // 2] pred_lossj = inputs[inputs.size(0) // 2:] target_loss = targets.reshape(inputs.size(0), 1) target_loss = target_loss[random] target_lossi = target_loss[:inputs.size(0) // 2] target_lossj = target_loss[inputs.size(0) // 2:] final_target = torch.sign(target_lossi - target_lossj) return F.margin_ranking_loss(pred_lossi, pred_lossj, final_target, margin=self.margin, reduction='mean')
class TripletSemihardLoss(nn.Module): # TriHardLoss """ Shape: - Input: :math:`(N, C)` where `C = number of channels` - Target: :math:`(N)` - Output: scalar. """ def __init__(self, device, margin=0, size_average=True): super(TripletSemihardLoss, self).__init__() self.margin = margin self.size_average = size_average self.device = device def forward(self, input, target): y_true = target.int().unsqueeze(-1) #列扩展 same_id = torch.eq(y_true, y_true.t()).type_as(input) #比较相等 pos_mask = same_id neg_mask = 1 - same_id def _mask_max(input_tensor, mask, axis=None, keepdims=False): #最远距离的正样本 input_tensor = input_tensor - 1e6 * (1 - mask) _max, _idx = torch.max(input_tensor, dim=axis, keepdim=keepdims) return _max, _idx def _mask_min(input_tensor, mask, axis=None, keepdims=False): #最近距离的负样本 input_tensor = input_tensor + 1e6 * (1 - mask) _min, _idx = torch.min(input_tensor, dim=axis, keepdim=keepdims) return _min, _idx # output[i, j] = || feature[i, :] - feature[j, :] ||_2 dist_squared = torch.sum(input ** 2, dim=1, keepdim=True) + \ #两向量的距离 torch.sum(input.t() ** 2, dim=0, keepdim=True) - \ 2.0 * torch.matmul(input, input.t()) dist = dist_squared.clamp(min=1e-16).sqrt() pos_max, pos_idx = _mask_max(dist, pos_mask, axis=-1) neg_min, neg_idx = _mask_min(dist, neg_mask, axis=-1) # loss(x, y) = max(0, -y * (x1 - x2) + margin) y = torch.ones(same_id.size()[0]).to(self.device) return F.margin_ranking_loss(neg_min.float(), pos_max.float(), y, self.margin, self.size_average)
def rank_loss_fn(self, predict, label, margin=0.8, reduction='mean'): predict = predict.reshape(-1) label = label.reshape(-1) pos_mask = label > 0 pos = predict[pos_mask] neg = predict[~pos_mask] neg_mask = torch.randint(0, neg.shape[0], (pos.shape[0], ), device=label.device) neg = neg[neg_mask] rank_loss = F.margin_ranking_loss(pos, neg, target=torch.ones_like(pos), margin=margin, reduction=reduction) return {"loss_rank": rank_loss, "loss": rank_loss}
def unsupervised_train_step(self, data): """One training step Arguments: data {dict of data} -- required keys and values: 'X' {LongTensor [batch_size, history_len, max_x_sent_len]} -- token ids of context sentences 'X_floor' {LongTensor [batch_size, history_len]} -- floors of context sentences 'Y' {LongTensor [batch_size, max_y_sent_len]} -- token ids of negtive-sampling response sentence 'Y_ref' {LongTensor [batch_size, max_y_sent_len]} -- token ids of reference response sentence 'Y_floor' {LongTensor [batch_size]} -- floor of response sentence Returns: dict of data -- returned keys and values 'loss' {FloatTensor []} -- loss to backword dict of statistics -- returned keys and values 'loss' {float} -- batch loss """ X, Y_neg, Y_ref = data["X"], data["Y"], data["Y_ref"] X_floor, Y_floor = data["X_floor"], data["Y_floor"] # Forward neg_encodings = self._encode_response(Y_neg) ref_encodings = self._encode_response(Y_ref) ctx_encodings = self._encode_context(X, X_floor, Y_floor) neg_unref_metric = self._compute_unref_metric(neg_encodings, ctx_encodings) ref_unref_metric = self._compute_unref_metric(ref_encodings, ctx_encodings) # Compute loss batch_size = X.size(0) loss = F.margin_ranking_loss( ref_unref_metric, neg_unref_metric, torch.ones(batch_size).long().to(DEVICE), margin=self.margin, reduction="mean" ) # return dicts ret_data = { "loss": loss } ret_stat = { "loss": loss.item() } return ret_data, ret_stat
def train(epoch): print("Epoch", epoch) t = time.time() model.train(True) torch.set_grad_enabled(True) eloss = 0 for batch_idx, instance in enumerate(train_generator): pos, neg, pht_bef, ptt_bef, nht_bef, ntt_bef = instance pos = pos.to(device) neg = neg.to(device) # text information pht = list(map(lambda x: x.to(device), pht_bef[0:3])) ptt = list(map(lambda x: x.to(device), ptt_bef[0:3])) nht = list(map(lambda x: x.to(device), nht_bef[0:3])) ntt = list(map(lambda x: x.to(device), ntt_bef[0:3])) batch_nodes, batch_adj = get_subgraph(pos, train_triple_dict, graph) # get relative location according to the batch_nodes shifted_pos, shifted_neg = convert_index([pos, neg], batch_nodes) batch_nodes = torch.LongTensor(batch_nodes.tolist()).to(device) batch_adj = torch.from_numpy(batch_adj).to(device) shifted_pos = torch.LongTensor(shifted_pos).to(device) shifted_neg = torch.LongTensor(shifted_neg).to(device) score_pos = model(batch_nodes, batch_adj, pos, shifted_pos, pht[0], pht[1], pht[2], ptt[0], ptt[1], ptt[2]) score_neg = model(batch_nodes, batch_adj, neg, shifted_neg, nht[0], nht[1], nht[2], ntt[0], ntt[1], ntt[2]) loss_train = F.margin_ranking_loss(score_pos, score_neg, y, margin=args.margin) sys.stdout.write( '%d batches processed. current train batch loss: %f\r' % (batch_idx, loss_train.item())) eloss += loss_train.item() loss_train.backward() del batch_nodes, batch_adj, shifted_pos, shifted_neg, pos, neg, pht_bef, ptt_bef, nht_bef, ntt_bef optimizer.step() if batch_idx % 500 == 0: gc.collect() print('\n') print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(eloss / (batch_idx + 1)), 'time: {:.4f}s'.format(time.time() - t)) return eloss
def ranking_loss(self, y_pos, y_neg, margin=1, C=1, average=True): """ Compute loss max margin ranking loss. Params: ------- y_pos: vector of size Mx1 Contains scores for positive samples. y_neg: np.array of size Mx1 (binary) Contains the true labels. margin: float, default: 1 Margin used for the loss. C: int, default: 1 Number of negative samples per positive sample. average: bool, default: True Whether to average the loss or just summing it. Returns: -------- loss: float """ M = y_pos.size(0) y_pos = y_pos.view(-1).repeat(C) # repeat to match y_neg y_neg = y_neg.view(-1) # target = [-1, -1, ..., -1], i.e. y_neg should be higher than y_pos target = -np.ones(M * C, dtype=np.float32) if self.gpu: target = Variable(torch.from_numpy(target).cuda()) else: target = Variable(torch.from_numpy(target)) loss = F.margin_ranking_loss(y_pos, y_neg, target, margin=margin, size_average=average) return loss
def ranking_loss(self, y_pos, y_neg, margin=1, C=1, energy_based=False, average=True): """ Compute loss max margin ranking loss. Params: ------- y_pos: vector of size Mx1 Contains scores for positive samples. y_neg: np.array of size Mx1 (binary) Contains the true labels. margin: float, default: 1 Margin used for the loss. C: int, default: 1 Number of negative samples per positive sample. energy_based: bool, default: False Whether to treat score as energy => minimizing score. average: bool, default: True Whether to average the loss or just summing it. Returns: -------- loss: float """ M = y_pos.size(0) y_pos = y_pos.view(-1).repeat(C) # repeat to match y_neg y_neg = y_neg.view(-1) target = Variable(torch.ones(M*C)) target = target.cuda() if self.gpu else target if energy_based: # target = [-1, ..., -1], i.e. y_neg should be higher than y_pos target = -target loss = F.margin_ranking_loss( y_pos, y_neg, target, margin=margin, size_average=average ) return loss
def rank_loss_fn(self, pos_index, neg_index, score, margin=0.7, reduction='mean'): assert len(score.shape) == 2 and len(pos_index.shape) == 2 one_idx = pos_index zero_idx = neg_index zero_mask = torch.randint(0, zero_idx.shape[1], (one_idx.shape[1], )) zero_idx = zero_idx[:, zero_mask] pos = score[one_idx[0], one_idx[1]] neg = score[zero_idx[0], zero_idx[1]] rank_loss = F.margin_ranking_loss(pos, neg, target=torch.ones_like(pos), margin=margin, reduction=reduction) return rank_loss
def _triplet_loss(self, feat_q, targets): dist_mat = euclidean_dist(feat_q, self.queue.t()) N, M = dist_mat.size() is_pos = targets.view(N, 1).expand(N, M).eq(self.queue_label.expand(N, M)).float() sorted_mat_distance, positive_indices = torch.sort(dist_mat + (-9999999.) * (1 - is_pos), dim=1, descending=True) dist_ap = sorted_mat_distance[:, 0] sorted_mat_distance, negative_indices = torch.sort(dist_mat + 9999999. * is_pos, dim=1, descending=False) dist_an = sorted_mat_distance[:, 0] y = dist_an.new().resize_as_(dist_an).fill_(1) loss = F.soft_margin_loss(dist_an - dist_ap, y) if loss == float('Inf'): loss = F.margin_ranking_loss(dist_an, dist_ap, y, margin=0.3) return loss
def test_step(self, batch, batch_idx): doc_a, doc_b, doc_a_mask, doc_b_mask, label_ids = batch #sentence score for 'sentence a' and 'sentence b' are 'phi_a' and 'phi_b' respectively phi_a, phi_b = self(doc_a, doc_b, attention_mask_a=doc_a_mask, attention_mask_b=doc_b_mask) loss = F.margin_ranking_loss(phi_a, phi_b, label_ids, margin=self.config_args.margin) pred_res = -1 * torch.ones_like(label_ids) pred_res[(phi_a > phi_b).view(-1)] = 1 #using testing with validation metric (as train and test runs are different) acc = self.test_accuracy(pred_res.long(), label_ids.long()) pbar = {'test_acc': acc} return {'test_loss': loss, 'progress_bar': pbar}
def validation_step(self, batch, batch_idx): doc_a, doc_b, doc_a_mask, doc_b_mask, label_ids = batch #sentence score for 'sentence a' and 'sentence b' are 'phi_a' and 'phi_b' respectively phi_a, phi_b = self(doc_a, doc_b, attention_mask_a=doc_a_mask, attention_mask_b=doc_b_mask) loss = F.margin_ranking_loss(phi_a, phi_b, label_ids, margin=self.config_args.margin) pred_res = -1 * torch.ones_like(label_ids) pred_res[(phi_a > phi_b).view(-1)] = 1 #get validation batch accuracy acc = self.val_accuracy(pred_res.long(), label_ids.long()) pbar = {'val_acc': acc} return {'val_loss': loss, 'progress_bar': pbar}
def forward(self, pos_scores: FloatTensorType, neg_scores: FloatTensorType) -> FloatTensorType: num_pos = match_shape(pos_scores, -1) num_neg = match_shape(neg_scores, num_pos, -1) # FIXME Workaround for https://github.com/pytorch/pytorch/issues/15223. if num_pos == 0 or num_neg == 0: return torch.zeros((), device=pos_scores.device, requires_grad=True) loss = F.margin_ranking_loss( neg_scores, pos_scores.unsqueeze(1), target=pos_scores.new_full((1, 1), -1, dtype=torch.float), margin=self.margin, reduction="sum", ) return loss
def margin_ranking_loss(inputs, data, margin=0.1): hist = inputs["hist"] device = hist.device margin_label = data["margin_label"].to(device) indices = data['angle_range_label'] margin_loss = [] for _hist, _margin_label, idx in zip(hist, margin_label, indices): if idx.item() == 255: margin_loss.append(0.0) else: den = _hist[:-1] + _hist[1:] x1 = (_hist[:-1] / den) x2 = (_hist[1:] / den) margin_loss.append(F.margin_ranking_loss(x1, x2, _margin_label, margin=margin, reduction="sum")) return sum(margin_loss) / len(margin_loss)
def training_step(self, batch, batch_idx): doc_a, doc_b, doc_a_mask, doc_b_mask, label_ids = batch #sentence score for 'sentence a' and 'sentence b' are 'phi_a' and 'phi_b' respectively phi_a, phi_b = self(doc_a, doc_b, attention_mask_a=doc_a_mask, attention_mask_b=doc_b_mask) loss = F.margin_ranking_loss(phi_a, phi_b, label_ids, margin=self.config_args.margin) pred_res = -1 * torch.ones_like(label_ids) pred_res[phi_a.detach().requires_grad_(False).view(-1) > phi_b.detach().requires_grad_(False).view(-1)] = 1 #get training batch accuracy acc = self.train_accuracy(pred_res.long(), label_ids.long()) pbar = {'batch_train_acc': acc} return {'loss': loss, 'progress_bar': pbar}
def test_margin_ranking_loss(self): inp1 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True) inp2 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True) target = (torch.randint(0, 1, (128, ), device='cuda') - 1).type_as(inp1) output = F.margin_ranking_loss(inp1, inp2, target, margin=0, size_average=None, reduce=None, reduction='mean')
def val(model, batch_val_pos, aliasTable, device): model.eval() with torch.no_grad(): batch_neg_items = aliasTable.draw(NEG_SIZE_RANKING * batch_val_pos.shape[0]) #batch_neg_items = randint(low=item_keys.min(), high=item_keys.max() + 1, size = NEG_SIZE_RANKING * batch_val_pos.shape[0]) val_pos_logits, val_neg_logits = model(batch_val_pos, batch_neg_items) target = torch.ones(NEG_SIZE_RANKING * batch_val_pos.shape[0], 1).to(device) pos = torch.repeat_interleave(val_pos_logits, NEG_SIZE_RANKING, dim=0) loss = F.margin_ranking_loss(pos, val_neg_logits, target, margin=LOSS_MARGIN, reduction='mean') HR1, HR3, HR20, HR50, MRR10, MRR20, MRR50, NDCG10, NDCG20, NDCG50 = metrics( val_pos_logits, val_neg_logits, training=False) return loss.to('cpu').detach().numpy( ), HR1, HR3, HR20, HR50, MRR10, MRR20, MRR50, NDCG10, NDCG20, NDCG50
def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor): """ Calculate rank hinge loss. :param y_pred: Predicted result. :param y_true: Label. :return: Hinge loss computed by user-defined margin. """ y_pos = y_pred[::(self.num_neg + 1), :] y_negs = [] for neg_idx in range(self.num_neg): neg = y_pred[(neg_idx + 1)::(self.num_neg + 1), :] y_negs.append(neg) losses = [ F.margin_ranking_loss(y_pos, neg, torch.ones_like(y_pos), margin=self.margin) for neg in y_negs ] return torch.mean(torch.stack(losses))
def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor): """ Calculate rank hinge loss. :param y_pred: Label. :param y_true: Predicted result. :return: Hinge loss computed by user-defined margin. """ y_pos = y_pred[::(self.num_neg + 1), :] y_neg = [] for neg_idx in range(self.num_neg): neg = y_pred[(neg_idx + 1)::(self.num_neg + 1), :] y_neg.append(neg) y_neg = torch.cat(y_neg, dim=-1) y_neg = torch.mean(y_neg, dim=-1, keepdim=True) y_true = torch.ones_like(y_pos) return F.margin_ranking_loss(y_pos, y_neg, y_true, margin=self.margin, reduction=self.reduction)
def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor = None): """ Calculate rank hinge loss :param y_pred: 预测结果,二维的,但是只有一个值,也就是最后一维只有一个神经元,这是概率值 :param y_true: 真实标签值 :return: """ y_pos = y_pred[::(self.num_neg + 1), :] y_neg = [] for neg_idx in range(self.num_neg): neg = y_pred[(neg_idx + 1)::(self.num_neg + 1), :] y_neg.append(neg) y_neg = torch.cat(y_neg, dim=-1) ## 对所有的负例概率求平均 y_neg = torch.mean(y_neg, dim=-1, keepdim=True) y_true = torch.ones_like(y_pos) return F.margin_ranking_loss(y_pos, y_neg, y_true, margin=self.margin, reduction=self.reduction)
def forward(self, inputs, targets, epoch, original_size, annotation): n = inputs.size(0) # Compute pairwise distance, replace by the official when merged dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) dist = dist + dist.t() dist.addmm_(1, -2, inputs, inputs.t()) dist = dist.clamp(min=1e-12).sqrt() # for numerical stability def gen_target(dist): y = dist.data.new() y.resize_as_(dist.data) y.fill_(1) return Variable(y) loss = 0 for i in range(original_size): dist_ap = dist[i][original_size + 2 * i].unsqueeze(0) dist_an = dist[i][original_size + 2 * i + 1].unsqueeze(0) target = gen_target(dist_ap) loss += F.margin_ranking_loss(dist_an, dist_ap, target, margin=annotation[i] * self.margin) return loss / original_size
def hinge(pos_score: Dict[EType_canon, torch.Tensor], neg_score: Dict[EType_canon, torch.Tensor], device: torch.device, reduction: str = 'sum', max_margin: float = 1e-1) -> torch.Tensor(): """ Computes hinge loss. Parameters ---------- pos_score : Dict[EType_canon, torch.Tensor] From etype to tensor with scores of real edges. neg_score From etype to tensor with scores of fake edges. device : torch.device Device on which we compute loss. reduction : str Type of loss reduction across minibatch ('mean' or 'sum'). max_margin : float Parameter for margin_ranking_loss. Returns ------- torch.Tensor Loss. """ predicted_pos = torch.cat(list(pos_score.values())) predicted_neg = torch.cat(list(neg_score.values())) # predicted_pos should be higher than predicted_neg, so target = [1, ..., 1] target = torch.ones(len(predicted_pos)).to(device) loss = F.margin_ranking_loss(predicted_pos, predicted_neg, target, reduction=reduction, margin=max_margin) return loss
def margin_rank_loss(output, target, sample_size=32, margin=1.0): label = target.cpu().numpy() pos_indices = [] neg_indices = [] for cnt, sublabel in enumerate(mit.sliced(label, sample_size)): pos, neg = [], [] for i, l in enumerate(sublabel): i += cnt * sample_size if l: pos.append(i) else: neg.append(i) len_p = len(pos) len_n = len(neg) pos_indices.extend([i for i in pos for _ in range(len_n)]) neg_indices.extend(neg * len_p) y = -1 * torch.ones(output[pos_indices, :].shape[0]).to(target.device) loss = F.margin_ranking_loss(output[pos_indices, :], output[neg_indices, :], y, margin=margin, reduction="mean") return loss
def test( model, batch_test_pos, aliasTable, device, DEBUG=False, ): model.eval() with torch.no_grad(): batch_neg_items = aliasTable.draw(NEG_SIZE_RANKING * batch_test_pos.shape[0]) #batch_neg_items = randint(low=item_keys.min(), high=item_keys.max() + 1, size=NEG_SIZE_RANKING * batch_test_pos.shape[0]) if not DEBUG: test_pos_logits, test_neg_logits = model(batch_test_pos, batch_neg_items) else: test_pos_logits, test_neg_logits,gcn_embedding,gcn_process_user_emb,gcn_process_item_pos_emb,\ gcn_process_item_neg_emb,user_fin_embed,item_pos_fin_embed,item_neg_fin_embed,user_attention,item_pos_attn,\ item_neg_attn = model(batch_test_pos, batch_neg_items,DEBUG) if not DEBUG: target = torch.ones(NEG_SIZE_RANKING * batch_test_pos.shape[0], 1).to(device) pos = torch.repeat_interleave(test_pos_logits, NEG_SIZE_RANKING, dim=0) loss = F.margin_ranking_loss(pos, test_neg_logits, target, margin=LOSS_MARGIN, reduction='mean') HR1, HR3, HR20, HR50, MRR10, MRR20, MRR50, NDCG10, NDCG20, NDCG50 = metrics( test_pos_logits, test_neg_logits, training=False) return loss.to('cpu').detach().numpy( ), HR1, HR3, HR20, HR50, MRR10, MRR20, MRR50, NDCG10, NDCG20, NDCG50 else: return test_pos_logits, test_neg_logits,gcn_embedding,gcn_process_user_emb,gcn_process_item_pos_emb,\ gcn_process_item_neg_emb,user_fin_embed,item_pos_fin_embed,item_neg_fin_embed,user_attention,item_pos_attn, item_neg_attn
def configure_criterion(self, y, t): criterion = F.cross_entropy(y, t) if self.hparams.criterion == "cross_entropy": criterion = F.cross_entropy(y, t) elif self.hparams.criterion == "binary_cross_entropy": criterion = F.binary_cross_entropy(y, t) elif self.hparams.criterion == "binary_cross_entropy_with_logits": criterion = F.binary_cross_entropy_with_logits(y, t) elif self.hparams.criterion == "poisson_nll_loss": criterion = F.poisson_nll_loss(y, t) elif self.hparams.criterion == "hinge_embedding_loss": criterion = F.hinge_embedding_loss(y, t) elif self.hparams.criterion == "kl_div": criterion = F.kl_div(y, t) elif self.hparams.criterion == "l1_loss": criterion = F.l1_loss(y, t) elif self.hparams.criterion == "mse_loss": criterion = F.mse_loss(y, t) elif self.hparams.criterion == "margin_ranking_loss": criterion = F.margin_ranking_loss(y, t) elif self.hparams.criterion == "multilabel_margin_loss": criterion = F.multilabel_margin_loss(y, t) elif self.hparams.criterion == "multilabel_soft_margin_loss": criterion = F.multilabel_soft_margin_loss(y, t) elif self.hparams.criterion == "multi_margin_loss": criterion = F.multi_margin_loss(y, t) elif self.hparams.criterion == "nll_loss": criterion = F.nll_loss(y, t) elif self.hparams.criterion == "smooth_l1_loss": criterion = F.smooth_l1_loss(y, t) elif self.hparams.criterion == "soft_margin_loss": criterion = F.soft_margin_loss(y, t) return criterion
def forward(self, inputs, targets): num_input = len(inputs) if num_input == 3 : softmax_out = inputs[0] trip_out = inputs[1] features = inputs[2] elif num_input == 6: softmax_out = inputs[0] trip_out = inputs[1] l2_side = inputs[2] l3_side = inputs[3] l4_side = inputs[4] features = inputs[5] sac_loss = 0 xentropy = 0 TripletLoss1 = 0 TripletLoss2 = 0 prec = 0 prec_sf = 0 if self.config.sac: # if self.config.model == 'resnet_channel3': # sac_loss = sac2(l2_side, l3_side, l4_side) # else: # sac_loss = sac3(l2_side,l3_side,l4_side) sac_loss = sac3(l2_side, l3_side, l4_side) num_softmax = len(softmax_out) # print(num_softmax) for i in range(0,num_softmax): xentropy += F.cross_entropy(softmax_out[i], targets) prec_sf, = accuracy(softmax_out[0].data, targets.data) prec_sf = prec_sf[0] num_trip = len(trip_out) for i in range(0,num_trip): if self.config.trip and not self.config.trip_weight and not self.config.quad and not self.config.rank_loss: # dist_ap, dist_an, y = select_triplet_exp(trip_out[i],targets, # self.num_instances) dist_ap, dist_an, y = select_triplet(trip_out[i],targets, self.num_instances) TripletLoss1 += F.margin_ranking_loss(dist_an, dist_ap, y, self.margin1) prec = (dist_an.data > dist_ap.data).sum().item() * 1. / y.size(0) if self.config.trip_weight and not self.config.trip and not self.config.quad and not self.config.rank_loss: normalizer = nn.Softmax() dist_ap, dist_an, y = select_triplet_weighted(trip_out[i],targets, self.num_instances, normalizer,self.config.trip_com, self.config.weight_margin, self.config.trip_weight_pos_radius_divide, self.config.weight_margin, self.config.trip_weight_neg_radius_divide) pdb.set_trace() if self.config.trip_com: #### print('Using triplet and triplet-weighted loss') TripletLoss1 += F.margin_ranking_loss(dist_an[0], dist_ap[0], y, self.margin1) TripletLoss2 += F.margin_ranking_loss(dist_an[1], dist_ap[1], y, self.margin2) prec = (dist_an[0].data > dist_ap[0].data).sum().item() * 1. / y.size(0) else: #### print('Using triplet-weighted loss') TripletLoss2 += F.margin_ranking_loss(dist_an, dist_ap, y, self.margin2) prec = (dist_an.data > dist_ap.data).sum().item() * 1. / y.size(0) if self.config.quad and not self.config.trip_weight and not self.config.trip and not self.config.rank_loss: dist_ap, dist_an, dist_ann, y = select_quadruplet(trip_out[i], targets, self.num_instances) TripletLoss1 += F.margin_ranking_loss(dist_an, dist_ap, y, self.margin1) TripletLoss2 += F.margin_ranking_loss(dist_ann, dist_ap, y, self.margin2) prec = (dist_an.data > dist_ap.data).sum().item() * 1. / y.size(0) if self.config.rank_loss and not self.config.trip_weight and not self.config.trip and not self.config.quad: rank_loss, prec = cal_rank_loss_normalize(trip_out[i], targets, self.num_instances, self.config.margin1) TripletLoss1 = rank_loss if self.config.sac: # pdb.set_trace() loss = self.alpha * TripletLoss1 + self.beta * TripletLoss2 + self.gamma * xentropy + self.theta * sac_loss else: loss = self.alpha * TripletLoss1 + self.beta * TripletLoss2 + self.gamma * xentropy # pdb.set_trace() prec_batch = max(prec, prec_sf) return loss, prec_batch
score_matrix = score_model(query, hidden, query_len, features, cross=True) score_positive = score_matrix.diag() # score_neg1 = score_func(query, features_neg, summary) score_neg1 = score_model(query, hidden, query_len, features_neg) # score_neg2 = score_func(query_neg, features, summary_neg) score_neg2 = score_model(query_neg, hidden_neg, query_neg_len, features) # loss_manual_mining = -F.logsigmoid(score_positive - score_neg1).mean() \ # - F.logsigmoid(score_positive - score_neg2).mean() loss_manual_mining = F.margin_ranking_loss(score_positive, score_neg1, target, margin=0.9) \ + F.margin_ranking_loss(score_positive, score_neg2, target, margin=0.9) loss_hard_mining = contrastive_loss(score_matrix) loss = loss_manual_mining + loss_hard_mining # + 0.5 * loss_gen loss.backward() optimizer.step() distributed.all_reduce(loss_manual_mining.data) distributed.all_reduce(loss_hard_mining.data) losses_hard_mining += loss_hard_mining.data.item() / len( args.devices) losses_manual_mining += loss_manual_mining.data.item() / len( args.devices)
def triplet_loss(embedding, targets, margin, norm_feat, hard_mining, dist_type, loss_type, domain_labels=None, pos_flag=[1, 0, 0], neg_flag=[0, 0, 1]): r"""Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid). Related Triplet Loss theory can be found in paper 'In Defense of the Triplet Loss for Person Re-Identification'.""" if norm_feat: embedding = normalize(embedding, axis=-1) # For distributed training, gather all features from different process. if comm.get_world_size() > 1: all_embedding = concat_all_gather(embedding) all_targets = concat_all_gather(targets) else: all_embedding = embedding all_targets = targets if dist_type == 'euclidean': dist_mat = euclidean_dist(all_embedding, all_embedding) elif dist_type == 'cosine': dist_mat = cosine_dist(all_embedding, all_embedding) N = dist_mat.size(0) if (pos_flag == [1, 0, 0] and neg_flag == [0, 1, 1]) or domain_labels == None: is_pos = all_targets.view(N, 1).expand(N, N).eq( all_targets.view(N, 1).expand(N, N).t()) is_neg = all_targets.view(N, 1).expand(N, N).ne( all_targets.view(N, 1).expand(N, N).t()) else: vec1 = copy.deepcopy(all_targets) for i in range(N): vec1[i] = i # [0,1,2,3,4,~~] is_same_img = vec1.expand(N, N).eq(vec1.expand(N, N).t()) is_same_instance = all_targets.view(N, 1).expand(N, N).eq( all_targets.view(N, 1).expand(N, N).t()) is_same_domain = domain_labels.view(N, 1).expand(N, N).eq( domain_labels.view(N, 1).expand(N, N).t()) set0 = is_same_img set_all = [] set_all.extend([is_same_instance * (is_same_img == False)]) set_all.extend([(is_same_instance == False) * (is_same_domain == True) ]) set_all.extend([is_same_domain == False]) is_pos = copy.deepcopy(set0) is_neg = copy.deepcopy(set0 == False) is_neg[:] = False for i, bool_flag in enumerate(pos_flag): if bool_flag == 1: is_pos += set_all[i] for i, bool_flag in enumerate(neg_flag): if bool_flag == 1: is_neg += set_all[i] # print(pos_flag) # print(is_pos.type(torch.IntTensor)) # print(neg_flag) # print(is_neg.type(torch.IntTensor)) if hard_mining: dist_ap, dist_an = hard_example_mining(dist_mat, is_pos, is_neg) else: dist_ap, dist_an = weighted_example_mining(dist_mat, is_pos, is_neg) y = dist_an.new().resize_as_(dist_an).fill_(1) if margin > 0: # all(sum(is_pos) == 1) loss = F.margin_ranking_loss(dist_an, dist_ap, y, margin=margin) else: if loss_type == 'logistic': loss = F.soft_margin_loss(dist_an - dist_ap, y) # fmt: off if loss == float('Inf'): loss = F.margin_ranking_loss(dist_an, dist_ap, y, margin=0.3) # fmt: on elif loss_type == 'hinge': loss = F.margin_ranking_loss(dist_an, dist_ap, y, margin=margin) return loss