Example #1
0
def train_batch(count,
                b,
                verbose=False,
                base_evaluator=None,
                train_evaluator=None):
    result = detector[b]

    losses = {}
    losses['class_loss'] = F.cross_entropy(result.rm_obj_dists,
                                           result.rm_obj_labels)
    losses['rel_loss'] = F.cross_entropy(result.rel_dists,
                                         result.rel_labels[:, -1])

    loss = sum(losses.values())

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    losses['total'] = loss
    optimizer.step()
    res = pd.Series({x: y.data[0] for x, y in losses.items()})
    return res
Example #2
0
def train_batch(b, verbose=False):
	"""
	:param b: contains:
		  :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
		  :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
		  :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
								  RPN feature vector that give us all_anchors,
								  each one (img_ind, fpn_idx)
		  :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

		  :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

		  Training parameters:
		  :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
									be used to compute the training loss (img_ind, fpn_idx)
		  :param gt_boxes: [num_gt, 4] GT boxes over the batch.
		  :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
	:return:
	"""
	result = detector[b]

	losses = {}
	losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels)
	losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1])
	loss = sum(losses.values())

	optimizer.zero_grad()
	loss.backward()
	clip_grad_norm(
		[(n, p) for n, p in detector.named_parameters() if p.grad is not None],
		max_norm=conf.clip, verbose=verbose, clip=True)
	losses['total'] = loss
	optimizer.step()
	res = pd.Series({x: y.data[0] for x, y in losses.items()})
	return res
Example #3
0
def train_batch(b, verbose=False):

    result = detector[b]
    if result is None:
        return pd.Series({'class_loss': 0.0, 'rel_loss': 0.0, 'total': 0.0})

    losses = {}

    losses['class_loss'] = F.cross_entropy(result.rm_obj_logits,
                                           result.rm_obj_labels)
    losses['rel_loss'] = F.binary_cross_entropy_with_logits(
        result.rel_logits, result.rel_labels[:, 3:].float())
    losses['rel_loss'] *= result.rel_labels[:, 3:].size(1)

    loss = sum(losses.values())

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    losses['total'] = loss
    optimizer.step()
    res = pd.Series({x: y.data[0] for x, y in losses.items()})

    return res
Example #4
0
def train_batch(b, verbose=False):
    depth_imgs, dec_fmaps = detector[b]

    losses = {}
    losses['ae_loss'] = F.mse_loss(dec_fmaps, depth_imgs)
    loss = sum(losses.values())

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip, verbose=verbose, clip=True)
    losses['total'] = loss
    optimizer.step()
    res = pd.Series({x: y.item() for x, y in losses.items()})
    return res
Example #5
0
def train_batch(b, multi_l, multi_rel, verbose=False, portion=0):

    losses = {}
    result = detector[b]
    portion += result.global_dists.shape[0]
    losses['class_loss'] = F.cross_entropy(result.rm_obj_dists,
                                           result.rm_obj_labels)

    if conf.reachability is True:
        losses['center_loss'] = result.center_loss
    losses['rel_loss'] = F.cross_entropy(
        result.rel_dists, result.rel_labels[:, -1])  # ,to_cuda(W_REL))
    # losses['rel_loss2'] = F.cross_entropy(result.rel_dists2, result.rel_labels[:, -1])  # ,to_cuda(W_REL))

    if additive_att is True:
        if multi_l is None:
            multi_l = criterion(result.global_dists, result.multi_hot)
        else:
            multi_l += criterion(result.global_dists, result.multi_hot)
        if portion > 100:
            loss = sum(losses.values()) + (multi_l / portion)
            multi_l = None
            portion = 0
        else:
            loss = sum(losses.values())
    else:
        loss = sum(losses.values())

    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    losses['total'] = loss
    optimizer.step()
    res = pd.Series({x: y.data[0] for x, y in losses.items()})
    return res, multi_l, multi_rel, result, portion
def train_batch(b):
    """
	:param b: contains:
		  :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
		  :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
		  :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
								  RPN feature vector that give us all_anchors,
								  each one (img_ind, fpn_idx)
		  :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

		  :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

		  Training parameters:
		  :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
									be used to compute the training loss (img_ind, fpn_idx)
		  :param gt_boxes: [num_gt, 4] GT boxes over the batch.
		  :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)

	:return:
	"""
    result = detector[b]
    scores = result.obj_scores
    labels = result.obj_labels

    # detector loss
    loss = criterion(scores, labels[:, 0])
    res = pd.Series([loss.data[0]], ['loss'])

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        clip=True)
    optimizer.step()

    return res
Example #7
0
def train_batch(batch_num, b, detector, train, optimizer, verbose=False):
    """
    :param b: contains:
        :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
        :param all_anchors: [num_anchors, 4] the boxes of all anchors
            that we'll be using
        :param all_anchor_inds: [num_anchors, 2] array of the indices
            into the concatenated RPN feature vector that give us all_anchors,
            each one (img_ind, fpn_idx)
        :param im_sizes: a [batch_size, 4] numpy array of
            (h, w, scale, num_good_anchors) for each image.

        :param num_anchors_per_img: int, number of anchors in total
             over the feature pyramid per img

        Training parameters:
        :param train_anchor_inds: a [num_train, 5] array of indices for
             the anchors that will be used to compute the training loss
             (img_ind, fpn_idx)
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
    :return:
    """

    result, result_preds = detector[b]

    losses = {}
    losses['class_loss'] = F.cross_entropy(result.rm_obj_dists,
                                           result.rm_obj_labels)
    n_rel = len(train.ind_to_predicates)

    if conf.lml_topk is not None and conf.lml_topk:
        # Note: This still uses a maximum of 1 relationship per edge
        # in the graph. Adding them all requires changing the data loading
        # process.
        gt = result.rel_labels[:, -1]

        I = gt > 0
        gt = gt[I]
        n_pos = len(gt)

        reps = torch.cat(result.rel_reps)
        I_reps = I.unsqueeze(1).repeat(1, n_rel)
        reps = reps[I_reps].view(-1, n_rel)

        loss = []
        for i in range(n_pos):
            gt_i = gt[i]
            reps_i = reps[i]
            loss_i = -(reps_i[gt_i].log())
            loss.append(loss_i)

        loss = torch.cat(loss)
        loss = torch.sum(loss) / n_pos
        losses['rel_loss'] = loss
    elif conf.ml_loss:
        loss = []

        start = 0
        for i, rel_reps_i in enumerate(result.rel_reps):
            n = rel_reps_i.shape[0]

            # Get rid of the background labels here:
            reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1)
            gt = result.rel_labels[start:start + n, -1].data.cpu()
            I = gt > 0
            gt = gt[I]
            gt = gt - 1  # Hacky shift to get rid of background labels.
            r = (n_rel - 1) * torch.arange(len(I))[I].long()
            gt_flat = r + gt
            gt_flat_onehot = torch.zeros(len(reps))
            gt_flat_onehot.scatter_(0, gt_flat, 1)
            loss_i = torch.nn.BCEWithLogitsLoss(size_average=False)(
                reps, Variable(gt_flat_onehot.cuda()))
            loss.append(loss_i)

            start += n

        loss = torch.cat(loss)
        loss = torch.sum(loss) / len(loss)
        losses['rel_loss'] = loss
    elif conf.entr_topk is not None and conf.entr_topk:
        # Note: This still uses a maximum of 1 relationship per edge
        # in the graph. Adding them all requires changing the data loading
        # process.
        loss = []

        start = 0
        for i, rel_reps_i in enumerate(result.rel_reps):
            n = rel_reps_i.shape[0]

            # Get rid of the background labels here:
            reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1)
            if len(reps) <= conf.entr_topk:
                # Nothing to do for small graphs.
                continue

            gt = result.rel_labels[start:start + n, -1].data.cpu()
            I = gt > 0
            gt = gt[I]
            gt = gt - 1  # Hacky shift to get rid of background labels.
            r = (n_rel - 1) * torch.arange(len(I))[I].long()
            gt_flat = r + gt
            n_pos = len(gt_flat)

            if n_pos == 0:
                # Nothing to do if there is no ground-truth data.
                continue

            reps_sorted, J = reps.sort(descending=True)
            reps_sorted_last = reps_sorted[conf.entr_topk:]
            J_last = J[conf.entr_topk:]

            # Hacky way of removing the ground-truth from J.
            J_last_bool = J_last != gt_flat[0]
            for j in range(n_pos - 1):
                J_last_bool *= (J_last != gt_flat[j + 1])
            J_last_bool = J_last_bool.type_as(reps)

            loss_i = []
            for j in range(n_pos):
                yj = gt_flat[j]
                fyj = reps[yj]
                loss_ij = torch.log(1. +
                                    torch.sum((reps_sorted_last - fyj).exp() *
                                              J_last_bool))
                loss_i.append(loss_ij)

            loss_i = torch.cat(loss_i)
            loss_i = torch.sum(loss_i) / len(loss_i)
            loss.append(loss_i)

            start += n

        loss = torch.cat(loss)
        loss = torch.sum(loss) / len(loss)
        losses['rel_loss'] = loss
    else:
        losses['rel_loss'] = F.cross_entropy(result.rel_dists,
                                             result.rel_labels[:, -1])
    loss = sum(losses.values())

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    losses['total'] = loss
    optimizer.step()

    evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=True)
    evaluator_con = BasicSceneGraphEvaluator.all_modes(multiple_preds=False)
    assert conf.num_gpus == 1
    # assert conf.mode == 'predcls'
    for i, (pred_i, gt_idx) in enumerate(zip(result_preds, b.indexes)):
        boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = pred_i

        gt_entry = {
            'gt_classes': train.gt_classes[gt_idx].copy(),
            'gt_relations': train.relationships[gt_idx].copy(),
            'gt_boxes': train.gt_boxes[gt_idx].copy(),
        }
        assert np.all(objs_i[rels_i[:, 0]] > 0) and \
            np.all(objs_i[rels_i[:, 1]] > 0)

        pred_entry = {
            'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE,
            'pred_classes': objs_i,
            'pred_rel_inds': rels_i,
            'obj_scores': obj_scores_i,
            'rel_scores': pred_scores_i,  # hack for now.
        }

        evaluator[conf.mode].evaluate_scene_graph_entry(
            gt_entry,
            pred_entry,
        )
        evaluator_con[conf.mode].evaluate_scene_graph_entry(
            gt_entry,
            pred_entry,
        )

    res = {x: y.data[0] for x, y in losses.items()}
    recalls = evaluator[conf.mode].result_dict[conf.mode + '_recall']
    recalls_con = evaluator_con[conf.mode].result_dict[conf.mode + '_recall']
    res.update({
        'recall20': np.mean(recalls[20]),
        'recall50': np.mean(recalls[50]),
        'recall100': np.mean(recalls[100]),
        'recall20_con': np.mean(recalls_con[20]),
        'recall50_con': np.mean(recalls_con[50]),
        'recall100_con': np.mean(recalls_con[100]),
    })

    res = pd.Series(res)
    return res
def train_batch(b):
    """
    :param b: contains:
          :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
          :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
          :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
          :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

          :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
          :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
          :param gt_boxes: [num_gt, 4] GT boxes over the batch.
          :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)

    :return:
    """
    result = detector[b]
    scores = result.od_obj_dists
    box_deltas = result.od_box_deltas
    labels = result.od_obj_labels
    roi_boxes = result.od_box_priors
    bbox_targets = result.od_box_targets
    rpn_scores = result.rpn_scores
    rpn_box_deltas = result.rpn_box_deltas

    # detector loss
    valid_inds = (labels.data != 0).nonzero().squeeze(1)
    fg_cnt = valid_inds.size(0)
    bg_cnt = labels.size(0) - fg_cnt
    class_loss = F.cross_entropy(scores, labels)

    # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d
    box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4)
    twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data

    box_loss = bbox_loss(roi_boxes[valid_inds],
                         box_deltas.view(-1, 4)[twod_inds],
                         bbox_targets[valid_inds]) * box_reg_mult

    loss = class_loss + box_loss

    # RPN loss
    if not conf.use_proposals:
        train_anchor_labels = b.train_anchor_labels[:, -1]
        train_anchors = b.train_anchors[:, :4]
        train_anchor_targets = b.train_anchors[:, 4:]

        train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1)
        rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels)

        # print("{} fg {} bg, ratio of {:.3f} vs {:.3f}. RPN {}fg {}bg ratio of {:.3f} vs {:.3f}".format(
        #     fg_cnt, bg_cnt, fg_cnt / (fg_cnt + bg_cnt + 1e-4), FG_FRACTION,
        #     train_valid_inds.size(0), train_anchor_labels.size(0)-train_valid_inds.size(0),
        #     train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4), RPN_FG_FRACTION), flush=True)
        rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(
            0) / (train_anchor_labels.size(0) + 1e-4)
        rpn_box_loss = bbox_loss(
            train_anchors[train_valid_inds], rpn_box_deltas[train_valid_inds],
            train_anchor_targets[train_valid_inds]) * rpn_box_mult

        loss += rpn_class_loss + rpn_box_loss
        res = pd.Series([
            rpn_class_loss.data[0], rpn_box_loss.data[0], class_loss.data[0],
            box_loss.data[0], loss.data[0]
        ], [
            'rpn_class_loss', 'rpn_box_loss', 'class_loss', 'box_loss', 'total'
        ])
    else:
        res = pd.Series([class_loss.data[0], box_loss.data[0], loss.data[0]],
                        ['class_loss', 'box_loss', 'total'])

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        clip=True)
    optimizer.step()

    return res
Example #9
0
def train_batch(b, verbose=False):
    """
    :param b: contains:
          :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
          :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
          :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
          :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

          :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
          :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
          :param gt_boxes: [num_gt, 4] GT boxes over the batch.
          :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
    :return:
    """
    #ipdb.set_trace()
    result = detector[b]
    losses = {}
    if conf.mode == 'sgdet':
        ############################  Detector Loss  #################################
        """
        # final classification
        labels = result.od_obj_labels  # [4000+]
        scores = result.od_obj_dists  # [4000+, 151]
        od_class_loss = F.cross_entropy(scores, labels)

        # final box location
        bbox_targets = result.od_box_targets  # [4000+, 4], gt box
        box_deltas = result.od_box_deltas  # [4000+, 151, 4], delta
        roi_boxes = result.od_box_priors  # [4000, 4], prior box

        # detector loss
        valid_inds = (labels.data != 0).nonzero().squeeze(1)
        fg_cnt = valid_inds.size(0)
        bg_cnt = labels.size(0) - fg_cnt

        # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d
        box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4)
        twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data

        od_box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds],
                             bbox_targets[valid_inds]) * box_reg_mult
        
        # RPN
        rpn_scores = result.rpn_scores  # [1536, 2], yes/no
        rpn_box_deltas = result.rpn_box_deltas  # [1536, 4]

        train_anchor_labels = b.train_anchor_labels[:, -1]
        train_anchors = b.train_anchors[:, :4]
        train_anchor_targets = b.train_anchors[:, 4:]

        train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1)
        rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels)

        rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4)
        rpn_box_loss = bbox_loss(train_anchors[train_valid_inds],
                                 rpn_box_deltas[train_valid_inds],
                                 train_anchor_targets[train_valid_inds]) * rpn_box_mult

        losses['rpn_class_loss'] = rpn_class_loss
        losses['rpn_box_loss'] = rpn_box_loss
        
        losses['od_class_loss'] = od_class_loss
        losses['od_box_loss'] = od_box_loss
        """
        #import ipdb
        #ipdb.set_trace()
        ############################  Detector Loss  #################################
        """
        ############################  LSTM Box Loss  #################################
        lstm_labels = result.rm_obj_labels  # [384]
        lstm_valid_inds = (lstm_labels.data != 0).nonzero().squeeze(1)
        lstm_fg_cnt = lstm_valid_inds.size(0)
        lstm_bg_cnt = lstm_labels.size(0) - lstm_fg_cnt
        lstm_box_reg_mult = 2 * (1. / FG_FRACTION) * lstm_fg_cnt / (lstm_fg_cnt + lstm_bg_cnt + 1e-4)
        lstm_rois = result.rm_box_priors.detach()
        lstm_deltas = result.lstm_box_deltas
        lstm_targets = result.rm_box_targets.detach()
        lstm_twod_inds = lstm_valid_inds * result.lstm_box_deltas.size(1) + lstm_labels[lstm_valid_inds].data
        lstm_box_loss = lstm_box_reg_mult * bbox_loss(lstm_rois[lstm_valid_inds], lstm_deltas.view(-1,4)[lstm_twod_inds],lstm_targets[lstm_valid_inds])
        losses['lstm_box_loss'] = lstm_box_loss
        ############################  LSTM Box Loss  #################################
        """
    # cross_entropy(input, target):
    # input, (#obj, 151), vector of #classes dim, which will be converted into probability (scores) by log_softmax
    # target, (#obj), corresponding obj labels belong to [1,150], which will be converted into one-hot vector
    # rm_obj_dists.shape:[164, 151]
    # rm_obj_labels.shape:[164]
    # result.rel_labels.shape:[1810, 4], [img_ind, box0_ind, box1_ind, rel_type]
    # result.rel_dists.shape:[1810, 51]
    margin = 0.6
    losses['triplet'] = 15 * torch.mean(
        torch.max(result.anchor, result.neg - result.pos + margin))
    losses['class_loss'] = F.cross_entropy(result.rm_obj_dists,
                                           result.rm_obj_labels)
    losses['rel_loss'] = F.cross_entropy(result.rel_dists,
                                         result.rel_labels[:, -1])
    loss = sum(losses.values())
    optimizer.zero_grad(
    )  # When perform loss.backward() the gradients are accumulated inplace in each Variable that requires gradient
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None
         ],  # p.grad is None when param don't backward propagate
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    losses['total'] = loss
    optimizer.step()  # update the net
    res = pd.Series({x: y.data[0] for x, y in losses.items()})
    hard = (result.ratio.data[0] + result.ratio.data[3] +
            result.ratio.data[6]) / 3
    soft = (result.ratio.data[1] + result.ratio.data[4] +
            result.ratio.data[7]) / 3
    fenmu = (result.ratio.data[2] + result.ratio.data[5] +
             result.ratio.data[8]) / 3
    return res, hard, soft, fenmu
Example #10
0
def train_batch_rl(count,
                   b,
                   verbose=False,
                   base_evaluator=None,
                   train_evaluator=None):
    """
    :param b: contains:
          :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
          :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
          :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
          :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

          :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
          :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
          :param gt_boxes: [num_gt, 4] GT boxes over the batch.
          :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
    :return:
    """
    detector.eval()
    base_eval = detector[b]
    base_reward = float(
        get_recall_x(count, [base_eval], base_evaluator, 100)[-1])
    del base_eval

    detector.rl_train = True
    detector.train()
    fix_batchnorm(detector)

    for k in range(SAMPLE_NUM):
        result, train_eval = detector[b]
        current_reward = float(
            get_recall_x(count, [train_eval], train_evaluator, 100)[-1])
        del train_eval

        losses = {}

        if base_reward == current_reward or float(sum(
                result.gen_tree_loss)) == 0:
            losses['policy_gradient_gen_tree_loss'] = 0
            loss = 0
            continue

        if conf.use_rl_tree:
            # policy gradient loss
            losses['policy_gradient_gen_tree_loss'] = cal_policy_gradient_loss(
                result.gen_tree_loss, current_reward, base_reward)
            #losses['entropy_loss'] = sum(result.entropy_loss) * 5e-4
        else:
            losses['binary_gate_loss'] = bceloss(result.pair_gate,
                                                 result.pair_gt.view(-1))

        loss = sum(losses.values()) / SAMPLE_NUM
        loss.backward()
        del result
    detector.rl_train = False
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip,
        verbose=verbose,
        clip=True)
    optimizer.step()
    optimizer.zero_grad()
    losses['total'] = loss
    res = pd.Series({x: float(y) for x, y in losses.items()})

    return res
Example #11
0
def train_batch(batch, bi):
    """
    Args:
        batch: contains:
            imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
            all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
            all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
            im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

            num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
            train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
            gt_boxes: [num_gt, 4] GT boxes over the batch.
            gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        bi: batch index, integer
    Returns:
        result: pd.Series, result dict
    """
    result = detector[batch]
    if result is None:
        print('Error! No Pos Relation', bi)
        return

    losses = dict()

    class_loss = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels)
    losses['class_loss'] = class_loss.data[0]

    rel_loss = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1])
    losses['rel_loss'] = rel_loss.data[0]

    loss = class_loss + rel_loss
    if conf.model.split('_')[0] == 'fcknet':
        rel_pn_loss = F.cross_entropy(result.rel_pn_dists, result.rel_pn_labels)
        losses['rel_pn_loss'] = rel_pn_loss.data[0]
        loss += rel_pn_loss

    # TODO
    if conf.model == 'fcknet_v3':
        rel_mem_loss = F.cross_entropy(result.rel_mem_dists, result.rel_labels[:, -1])
        losses['mem_loss'] = rel_mem_loss.data[0]
        loss += rel_mem_loss
    if bi % conf.print_interval == 0 and bi >= conf.print_interval:
        if conf.model.split('_')[0] == 'fcknet':
            print(
                'rel_pn_loss: %.4f, cls_loss: %.4f, rel_loss: %.4f' %
                (losses['rel_pn_loss'], losses['class_loss'], losses['rel_loss'])
            )
            if conf.model == 'fcknet_v3':
                print(
                    'rel_mem_loss: %.4f' % losses['mem_loss']
                )

        else:
            print(
                'cls_loss: %.4f, rel_loss: %.4f' %
                (losses['class_loss'], losses['rel_loss'])
            )

    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip, verbose=bi % (conf.print_interval*10) == 0, clip=True
    )

    losses['total'] = loss.data[0]
    losses['trim_pos'] = result.rel_trim_pos[0]
    losses['trim_total'] = result.rel_trim_total[0]
    losses['sample_pos'] = result.rel_sample_pos[0]
    losses['sample_neg'] = result.rel_sample_neg[0]
    losses['relpn_recall'] = result.rel_pn_recall[0]
    optimizer.step()
    res = pd.Series({x: y for x, y in losses.items()})
    return res