def train_batch(count, b, verbose=False, base_evaluator=None, train_evaluator=None): result = detector[b] losses = {} losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() res = pd.Series({x: y.data[0] for x, y in losses.items()}) return res
def train_batch(b, verbose=False): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ result = detector[b] losses = {} losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() res = pd.Series({x: y.data[0] for x, y in losses.items()}) return res
def train_batch(b, verbose=False): result = detector[b] if result is None: return pd.Series({'class_loss': 0.0, 'rel_loss': 0.0, 'total': 0.0}) losses = {} losses['class_loss'] = F.cross_entropy(result.rm_obj_logits, result.rm_obj_labels) losses['rel_loss'] = F.binary_cross_entropy_with_logits( result.rel_logits, result.rel_labels[:, 3:].float()) losses['rel_loss'] *= result.rel_labels[:, 3:].size(1) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() res = pd.Series({x: y.data[0] for x, y in losses.items()}) return res
def train_batch(b, verbose=False): depth_imgs, dec_fmaps = detector[b] losses = {} losses['ae_loss'] = F.mse_loss(dec_fmaps, depth_imgs) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() res = pd.Series({x: y.item() for x, y in losses.items()}) return res
def train_batch(b, multi_l, multi_rel, verbose=False, portion=0): losses = {} result = detector[b] portion += result.global_dists.shape[0] losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) if conf.reachability is True: losses['center_loss'] = result.center_loss losses['rel_loss'] = F.cross_entropy( result.rel_dists, result.rel_labels[:, -1]) # ,to_cuda(W_REL)) # losses['rel_loss2'] = F.cross_entropy(result.rel_dists2, result.rel_labels[:, -1]) # ,to_cuda(W_REL)) if additive_att is True: if multi_l is None: multi_l = criterion(result.global_dists, result.multi_hot) else: multi_l += criterion(result.global_dists, result.multi_hot) if portion > 100: loss = sum(losses.values()) + (multi_l / portion) multi_l = None portion = 0 else: loss = sum(losses.values()) else: loss = sum(losses.values()) optimizer.zero_grad() loss.backward(retain_graph=True) clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() res = pd.Series({x: y.data[0] for x, y in losses.items()}) return res, multi_l, multi_rel, result, portion
def train_batch(b): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ result = detector[b] scores = result.obj_scores labels = result.obj_labels # detector loss loss = criterion(scores, labels[:, 0]) res = pd.Series([loss.data[0]], ['loss']) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, clip=True) optimizer.step() return res
def train_batch(batch_num, b, detector, train, optimizer, verbose=False): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ result, result_preds = detector[b] losses = {} losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) n_rel = len(train.ind_to_predicates) if conf.lml_topk is not None and conf.lml_topk: # Note: This still uses a maximum of 1 relationship per edge # in the graph. Adding them all requires changing the data loading # process. gt = result.rel_labels[:, -1] I = gt > 0 gt = gt[I] n_pos = len(gt) reps = torch.cat(result.rel_reps) I_reps = I.unsqueeze(1).repeat(1, n_rel) reps = reps[I_reps].view(-1, n_rel) loss = [] for i in range(n_pos): gt_i = gt[i] reps_i = reps[i] loss_i = -(reps_i[gt_i].log()) loss.append(loss_i) loss = torch.cat(loss) loss = torch.sum(loss) / n_pos losses['rel_loss'] = loss elif conf.ml_loss: loss = [] start = 0 for i, rel_reps_i in enumerate(result.rel_reps): n = rel_reps_i.shape[0] # Get rid of the background labels here: reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1) gt = result.rel_labels[start:start + n, -1].data.cpu() I = gt > 0 gt = gt[I] gt = gt - 1 # Hacky shift to get rid of background labels. r = (n_rel - 1) * torch.arange(len(I))[I].long() gt_flat = r + gt gt_flat_onehot = torch.zeros(len(reps)) gt_flat_onehot.scatter_(0, gt_flat, 1) loss_i = torch.nn.BCEWithLogitsLoss(size_average=False)( reps, Variable(gt_flat_onehot.cuda())) loss.append(loss_i) start += n loss = torch.cat(loss) loss = torch.sum(loss) / len(loss) losses['rel_loss'] = loss elif conf.entr_topk is not None and conf.entr_topk: # Note: This still uses a maximum of 1 relationship per edge # in the graph. Adding them all requires changing the data loading # process. loss = [] start = 0 for i, rel_reps_i in enumerate(result.rel_reps): n = rel_reps_i.shape[0] # Get rid of the background labels here: reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1) if len(reps) <= conf.entr_topk: # Nothing to do for small graphs. continue gt = result.rel_labels[start:start + n, -1].data.cpu() I = gt > 0 gt = gt[I] gt = gt - 1 # Hacky shift to get rid of background labels. r = (n_rel - 1) * torch.arange(len(I))[I].long() gt_flat = r + gt n_pos = len(gt_flat) if n_pos == 0: # Nothing to do if there is no ground-truth data. continue reps_sorted, J = reps.sort(descending=True) reps_sorted_last = reps_sorted[conf.entr_topk:] J_last = J[conf.entr_topk:] # Hacky way of removing the ground-truth from J. J_last_bool = J_last != gt_flat[0] for j in range(n_pos - 1): J_last_bool *= (J_last != gt_flat[j + 1]) J_last_bool = J_last_bool.type_as(reps) loss_i = [] for j in range(n_pos): yj = gt_flat[j] fyj = reps[yj] loss_ij = torch.log(1. + torch.sum((reps_sorted_last - fyj).exp() * J_last_bool)) loss_i.append(loss_ij) loss_i = torch.cat(loss_i) loss_i = torch.sum(loss_i) / len(loss_i) loss.append(loss_i) start += n loss = torch.cat(loss) loss = torch.sum(loss) / len(loss) losses['rel_loss'] = loss else: losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=True) evaluator_con = BasicSceneGraphEvaluator.all_modes(multiple_preds=False) assert conf.num_gpus == 1 # assert conf.mode == 'predcls' for i, (pred_i, gt_idx) in enumerate(zip(result_preds, b.indexes)): boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = pred_i gt_entry = { 'gt_classes': train.gt_classes[gt_idx].copy(), 'gt_relations': train.relationships[gt_idx].copy(), 'gt_boxes': train.gt_boxes[gt_idx].copy(), } assert np.all(objs_i[rels_i[:, 0]] > 0) and \ np.all(objs_i[rels_i[:, 1]] > 0) pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, # hack for now. } evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) evaluator_con[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) res = {x: y.data[0] for x, y in losses.items()} recalls = evaluator[conf.mode].result_dict[conf.mode + '_recall'] recalls_con = evaluator_con[conf.mode].result_dict[conf.mode + '_recall'] res.update({ 'recall20': np.mean(recalls[20]), 'recall50': np.mean(recalls[50]), 'recall100': np.mean(recalls[100]), 'recall20_con': np.mean(recalls_con[20]), 'recall50_con': np.mean(recalls_con[50]), 'recall100_con': np.mean(recalls_con[100]), }) res = pd.Series(res) return res
def train_batch(b): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ result = detector[b] scores = result.od_obj_dists box_deltas = result.od_box_deltas labels = result.od_obj_labels roi_boxes = result.od_box_priors bbox_targets = result.od_box_targets rpn_scores = result.rpn_scores rpn_box_deltas = result.rpn_box_deltas # detector loss valid_inds = (labels.data != 0).nonzero().squeeze(1) fg_cnt = valid_inds.size(0) bg_cnt = labels.size(0) - fg_cnt class_loss = F.cross_entropy(scores, labels) # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4) twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds], bbox_targets[valid_inds]) * box_reg_mult loss = class_loss + box_loss # RPN loss if not conf.use_proposals: train_anchor_labels = b.train_anchor_labels[:, -1] train_anchors = b.train_anchors[:, :4] train_anchor_targets = b.train_anchors[:, 4:] train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1) rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels) # print("{} fg {} bg, ratio of {:.3f} vs {:.3f}. RPN {}fg {}bg ratio of {:.3f} vs {:.3f}".format( # fg_cnt, bg_cnt, fg_cnt / (fg_cnt + bg_cnt + 1e-4), FG_FRACTION, # train_valid_inds.size(0), train_anchor_labels.size(0)-train_valid_inds.size(0), # train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4), RPN_FG_FRACTION), flush=True) rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size( 0) / (train_anchor_labels.size(0) + 1e-4) rpn_box_loss = bbox_loss( train_anchors[train_valid_inds], rpn_box_deltas[train_valid_inds], train_anchor_targets[train_valid_inds]) * rpn_box_mult loss += rpn_class_loss + rpn_box_loss res = pd.Series([ rpn_class_loss.data[0], rpn_box_loss.data[0], class_loss.data[0], box_loss.data[0], loss.data[0] ], [ 'rpn_class_loss', 'rpn_box_loss', 'class_loss', 'box_loss', 'total' ]) else: res = pd.Series([class_loss.data[0], box_loss.data[0], loss.data[0]], ['class_loss', 'box_loss', 'total']) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, clip=True) optimizer.step() return res
def train_batch(b, verbose=False): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ #ipdb.set_trace() result = detector[b] losses = {} if conf.mode == 'sgdet': ############################ Detector Loss ################################# """ # final classification labels = result.od_obj_labels # [4000+] scores = result.od_obj_dists # [4000+, 151] od_class_loss = F.cross_entropy(scores, labels) # final box location bbox_targets = result.od_box_targets # [4000+, 4], gt box box_deltas = result.od_box_deltas # [4000+, 151, 4], delta roi_boxes = result.od_box_priors # [4000, 4], prior box # detector loss valid_inds = (labels.data != 0).nonzero().squeeze(1) fg_cnt = valid_inds.size(0) bg_cnt = labels.size(0) - fg_cnt # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4) twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data od_box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds], bbox_targets[valid_inds]) * box_reg_mult # RPN rpn_scores = result.rpn_scores # [1536, 2], yes/no rpn_box_deltas = result.rpn_box_deltas # [1536, 4] train_anchor_labels = b.train_anchor_labels[:, -1] train_anchors = b.train_anchors[:, :4] train_anchor_targets = b.train_anchors[:, 4:] train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1) rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels) rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4) rpn_box_loss = bbox_loss(train_anchors[train_valid_inds], rpn_box_deltas[train_valid_inds], train_anchor_targets[train_valid_inds]) * rpn_box_mult losses['rpn_class_loss'] = rpn_class_loss losses['rpn_box_loss'] = rpn_box_loss losses['od_class_loss'] = od_class_loss losses['od_box_loss'] = od_box_loss """ #import ipdb #ipdb.set_trace() ############################ Detector Loss ################################# """ ############################ LSTM Box Loss ################################# lstm_labels = result.rm_obj_labels # [384] lstm_valid_inds = (lstm_labels.data != 0).nonzero().squeeze(1) lstm_fg_cnt = lstm_valid_inds.size(0) lstm_bg_cnt = lstm_labels.size(0) - lstm_fg_cnt lstm_box_reg_mult = 2 * (1. / FG_FRACTION) * lstm_fg_cnt / (lstm_fg_cnt + lstm_bg_cnt + 1e-4) lstm_rois = result.rm_box_priors.detach() lstm_deltas = result.lstm_box_deltas lstm_targets = result.rm_box_targets.detach() lstm_twod_inds = lstm_valid_inds * result.lstm_box_deltas.size(1) + lstm_labels[lstm_valid_inds].data lstm_box_loss = lstm_box_reg_mult * bbox_loss(lstm_rois[lstm_valid_inds], lstm_deltas.view(-1,4)[lstm_twod_inds],lstm_targets[lstm_valid_inds]) losses['lstm_box_loss'] = lstm_box_loss ############################ LSTM Box Loss ################################# """ # cross_entropy(input, target): # input, (#obj, 151), vector of #classes dim, which will be converted into probability (scores) by log_softmax # target, (#obj), corresponding obj labels belong to [1,150], which will be converted into one-hot vector # rm_obj_dists.shape:[164, 151] # rm_obj_labels.shape:[164] # result.rel_labels.shape:[1810, 4], [img_ind, box0_ind, box1_ind, rel_type] # result.rel_dists.shape:[1810, 51] margin = 0.6 losses['triplet'] = 15 * torch.mean( torch.max(result.anchor, result.neg - result.pos + margin)) losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) loss = sum(losses.values()) optimizer.zero_grad( ) # When perform loss.backward() the gradients are accumulated inplace in each Variable that requires gradient loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None ], # p.grad is None when param don't backward propagate max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() # update the net res = pd.Series({x: y.data[0] for x, y in losses.items()}) hard = (result.ratio.data[0] + result.ratio.data[3] + result.ratio.data[6]) / 3 soft = (result.ratio.data[1] + result.ratio.data[4] + result.ratio.data[7]) / 3 fenmu = (result.ratio.data[2] + result.ratio.data[5] + result.ratio.data[8]) / 3 return res, hard, soft, fenmu
def train_batch_rl(count, b, verbose=False, base_evaluator=None, train_evaluator=None): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ detector.eval() base_eval = detector[b] base_reward = float( get_recall_x(count, [base_eval], base_evaluator, 100)[-1]) del base_eval detector.rl_train = True detector.train() fix_batchnorm(detector) for k in range(SAMPLE_NUM): result, train_eval = detector[b] current_reward = float( get_recall_x(count, [train_eval], train_evaluator, 100)[-1]) del train_eval losses = {} if base_reward == current_reward or float(sum( result.gen_tree_loss)) == 0: losses['policy_gradient_gen_tree_loss'] = 0 loss = 0 continue if conf.use_rl_tree: # policy gradient loss losses['policy_gradient_gen_tree_loss'] = cal_policy_gradient_loss( result.gen_tree_loss, current_reward, base_reward) #losses['entropy_loss'] = sum(result.entropy_loss) * 5e-4 else: losses['binary_gate_loss'] = bceloss(result.pair_gate, result.pair_gt.view(-1)) loss = sum(losses.values()) / SAMPLE_NUM loss.backward() del result detector.rl_train = False clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) optimizer.step() optimizer.zero_grad() losses['total'] = loss res = pd.Series({x: float(y) for x, y in losses.items()}) return res
def train_batch(batch, bi): """ Args: batch: contains: imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) gt_boxes: [num_gt, 4] GT boxes over the batch. gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) bi: batch index, integer Returns: result: pd.Series, result dict """ result = detector[batch] if result is None: print('Error! No Pos Relation', bi) return losses = dict() class_loss = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) losses['class_loss'] = class_loss.data[0] rel_loss = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) losses['rel_loss'] = rel_loss.data[0] loss = class_loss + rel_loss if conf.model.split('_')[0] == 'fcknet': rel_pn_loss = F.cross_entropy(result.rel_pn_dists, result.rel_pn_labels) losses['rel_pn_loss'] = rel_pn_loss.data[0] loss += rel_pn_loss # TODO if conf.model == 'fcknet_v3': rel_mem_loss = F.cross_entropy(result.rel_mem_dists, result.rel_labels[:, -1]) losses['mem_loss'] = rel_mem_loss.data[0] loss += rel_mem_loss if bi % conf.print_interval == 0 and bi >= conf.print_interval: if conf.model.split('_')[0] == 'fcknet': print( 'rel_pn_loss: %.4f, cls_loss: %.4f, rel_loss: %.4f' % (losses['rel_pn_loss'], losses['class_loss'], losses['rel_loss']) ) if conf.model == 'fcknet_v3': print( 'rel_mem_loss: %.4f' % losses['mem_loss'] ) else: print( 'cls_loss: %.4f, rel_loss: %.4f' % (losses['class_loss'], losses['rel_loss']) ) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=bi % (conf.print_interval*10) == 0, clip=True ) losses['total'] = loss.data[0] losses['trim_pos'] = result.rel_trim_pos[0] losses['trim_total'] = result.rel_trim_total[0] losses['sample_pos'] = result.rel_sample_pos[0] losses['sample_neg'] = result.rel_sample_neg[0] losses['relpn_recall'] = result.rel_pn_recall[0] optimizer.step() res = pd.Series({x: y for x, y in losses.items()}) return res