Esempio n. 1
0
    def train(self, labels, data):
        """
        Base class func Train

        Function called to train layer

        Args:
            -> labels | correct output | dtype: list
            -> Data | System input | dtype: tf.Tensor
        """
        prediction = self(data, training=True)

        # Magic TF bindings for gradient decent
        with tf.GradientTape() as tape:

            # iterate anchors
            for index, pred, anchor in enumerate(prediction):

                # loss calc (see losses.py)
                cls_loss += keras.losses.binary_crossentropy(
                    labels[index][1], pred[1])
                reg_loss += smooth_l1_loss(labels[index][0], pred[0], anchor)
            loss = (cls_loss / 256) + (self.const * (reg_loss / (9 * 256)))

        # calculates gradients of last iteration
        grads = tape.gradient(loss, self.trainable_weights)
        # applies said graidents (with adam)
        self.optimiser.apply_gradients(zip(grads, self.trainable_weights))
Esempio n. 2
0
    def forward(self, head_features, gt_boxes, im_info, mode='gallery'):
        if self.is_train:
            rois, rpn_info, label, bbox_info, roi_trans_param = \
                self.region_proposal(head_features, gt_boxes, im_info)
            rpn_label, rpn_bbox_info, rpn_cls_score, rpn_bbox_pred = rpn_info

            rpn_cls_score = rpn_cls_score.view(-1, 2)
            rpn_label = rpn_label.view(-1)
            rpn_select = Variable((rpn_label.data != -1)).nonzero().view(-1)
            rpn_cls_score = rpn_cls_score.index_select(
                0, rpn_select).contiguous().view(-1, 2)
            rpn_label = rpn_label.index_select(
                0, rpn_select).contiguous().view(-1)

            rpn_cls_loss = F.cross_entropy(rpn_cls_score, rpn_label)
            rpn_box_loss = smooth_l1_loss(rpn_bbox_pred,
                                          rpn_bbox_info,
                                          sigma=3.0,
                                          dim=[1, 2, 3])
            rpn_loss = (rpn_cls_loss, rpn_box_loss)

            # Roi-pooling (unable to work now)
            # pooled_feat = self.roi_pool(head_features, rois)

            # Crop and resize
            pooled_feat = self.pooling(head_features, rois, max_pool=False)
            transformed_feat = self.spatial_transform(pooled_feat,
                                                      roi_trans_param)

            return pooled_feat, transformed_feat, rpn_loss, label, bbox_info

        else:
            if mode == 'gallery':
                rois, roi_trans_param = self.region_proposal(
                    head_features, gt_boxes, im_info)

                # Roi-pooling (unable to work now)
                # pooled_feat = self.roi_pool(head_features, rois)

                # Crop and resize
                pooled_feat = self.pooling(head_features, rois, max_pool=False)
                transformed_feat = self.spatial_transform(
                    pooled_feat, roi_trans_param)
                return rois, pooled_feat, transformed_feat

            elif mode == 'query':
                # TODO: whether to transform query
                # Roi-pooling (unable to work now)
                # pooled_feat = self.roi_pool(head_features, rois)

                # Crop and resize
                pooled_feat = self.pooling(head_features, gt_boxes, False)
                return pooled_feat

            else:
                raise KeyError(mode)
Esempio n. 3
0
    def train(self, labels, data):
        """
        Base class func Train

        Function called to train layer

        This method has a unique training method,
        Anchors and scores are calculated by the rpn.
        The Classifier is then run on these anchors
        using the inverse losses from the scores as
        their respective labels. 

        This method allows for both networks to be
        trained at the same time. The classifier trains
        better the better then RPN performs however.

        Args:
            -> labels | correct output | dtype: list
            -> Data | System input | dtype: tf.Tensor
        """

        with tf.GradientTape() as tape:
            pred = self.rpn(data)
            # iterate anchors
            for index, pred, anchor in enumerate(pred):

                # rpn loss part 1
                _cls = keras.losses.binary_crossentropy(
                    labels[index][1], pred[1])
                cls_loss += _cls

                # train the Classifier
                with tf.GraidentTape() as tape2:
                    pred2 = self.classifier(anchor)
                    class_loss = keras.losses.binary_crossentropy((1 - _cls),
                                                                  pred2)

                # Classifier grad decsent
                grad = tape.gradient(class_loss,
                                     self.classifier.trainable_weights)
                self.classifier.optimizer.apply_gradients(
                    zip(grad, self.classifier.trainable_weights))

                # RPN loss prt 2 the reckoning
                reg_loss += smooth_l1_loss(labels[index][0], pred[0], anchor)
            rpn_loss = (cls_loss / 256) + (self.rpn.const * (reg_loss /
                                                             (9 * 256)))

            # RPN grad decsent
            grad = tape.gradient(rpn_loss, self.rpn.trainable_weights)
            self.rpn.optimizer.apply_gradients(
                zip(grad, self.rpn.trainable_weights))
Esempio n. 4
0
    def forward(self, head_features, gt_boxes, im_info, mode='gallery'):
        if self.is_train:
            if mode == 'gallery':
                rois, rpn_info, label, bbox_info = self.region_proposal(
                    head_features, gt_boxes, im_info)
                rpn_label, rpn_bbox_info, rpn_cls_score, rpn_bbox_pred = \
                    rpn_info

                rpn_cls_score = rpn_cls_score.view(-1, 2)
                rpn_label = rpn_label.view(-1)
                rpn_select = Variable(
                    (rpn_label.data != -1)).nonzero().view(-1)
                rpn_cls_score = rpn_cls_score.index_select(
                    0, rpn_select).contiguous().view(-1, 2)
                rpn_label = rpn_label.index_select(
                    0, rpn_select).contiguous().view(-1)

                rpn_cls_loss = F.cross_entropy(rpn_cls_score, rpn_label)
                rpn_box_loss = smooth_l1_loss(rpn_bbox_pred,
                                              rpn_bbox_info,
                                              sigma=3.0,
                                              dim=[1, 2, 3])
                rpn_loss = (rpn_cls_loss, rpn_box_loss)

                # TODO: add roi-pooling
                pooled_feat = self.pooling(head_features, rois, max_pool=False)

                return pooled_feat, rpn_loss, label, bbox_info

            elif mode == 'query':
                pooled_feat = self.pooling(head_features, gt_boxes, False)
                return pooled_feat

            else:
                raise KeyError(mode)

        else:
            if mode == 'gallery':
                rois = self.region_proposal(head_features, gt_boxes, im_info)
                pooled_feat = self.pooling(head_features, rois, max_pool=False)

                return rois, pooled_feat

            elif mode == 'query':
                pooled_feat = self.pooling(head_features, gt_boxes, False)
                return pooled_feat

            else:
                raise KeyError(mode)
Esempio n. 5
0
    def forward(self, im_data, gt_boxes, im_info, mode='gallery'):
        if self.is_train:
            net_conv = self.head(im_data)
            # returned parameters contain 3 tuples here
            pooled_feat, trans_feat, rpn_loss, label, bbox_info = self.strpn(
                net_conv, gt_boxes, im_info)
            if self.net_name == 'vgg16':
                pooled_feat = pooled_feat.view(pooled_feat.size(0), -1)
                fc7 = self.tail(pooled_feat)
            else:
                fc7 = self.tail(pooled_feat).mean(3).mean(2)
            cls_score = self.cls_score_net(fc7)
            bbox_pred = self.bbox_pred_net(fc7)

            reid_fc7 = self.tail(trans_feat).mean(3).mean(2)
            reid_feat = F.normalize(self.reid_feat_net(reid_fc7))
            # reid_feat = F.normalize(self.reid_feat_net(fc7))

            cls_pred = torch.max(cls_score, 1)[1]
            cls_prob = F.softmax(cls_score, 1)
            det_label, pid_label = label

            det_label = det_label.view(-1)
            cls_loss = F.cross_entropy(cls_score.view(-1, 2), det_label)
            bbox_loss = smooth_l1_loss(bbox_pred, bbox_info)
            reid_loss = oim_loss(reid_feat, pid_label, self.lut, self.queue,
                                 gt_boxes.size(0), self.lut_momentum)
            rpn_cls_loss, rpn_box_loss = rpn_loss

            return rpn_cls_loss, rpn_box_loss, cls_loss, bbox_loss, reid_loss

        else:
            if mode == 'gallery':
                net_conv = self.head(im_data)
                rois, pooled_feat, trans_feat = self.strpn(
                    net_conv, gt_boxes, im_info)
                if self.net_name == 'vgg16':
                    pooled_feat = pooled_feat.view(pooled_feat.size(0), -1)
                    fc7 = self.tail(pooled_feat)
                else:
                    fc7 = self.tail(pooled_feat).mean(3).mean(2)
                cls_score = self.cls_score_net(fc7)
                bbox_pred = self.bbox_pred_net(fc7)

                reid_fc7 = self.tail(trans_feat).mean(3).mean(2)
                reid_feat = F.normalize(self.reid_feat_net(reid_fc7))
                # reid_feat = F.normalize(self.reid_feat_net(fc7))

                cls_pred = torch.max(cls_score, 1)[1]
                cls_prob = F.softmax(cls_score, 1)

                with open('config.yml', 'r') as f:
                    config = yaml.load(f)
                mean = config['train_bbox_normalize_means']
                std = config['train_bbox_normalize_stds']
                means = bbox_pred.data.new(mean).repeat(2).unsqueeze(
                    0).expand_as(bbox_pred)
                stds = bbox_pred.data.new(std).repeat(2).unsqueeze(
                    0).expand_as(bbox_pred)
                bbox_pred = bbox_pred.mul(Variable(stds)).add(Variable(means))

                cls_prob = cls_prob.data.cpu().numpy()
                bbox_pred = bbox_pred.data.cpu().numpy()
                rois = rois.data.cpu().numpy()
                reid_feat = reid_feat.data.cpu().numpy()

                return cls_prob, bbox_pred, rois, reid_feat

            elif mode == 'query':
                net_conv = self.head(im_data)
                # TODO: move pooling layer from strpn to SIPN
                pooled_feat = self.strpn(net_conv, gt_boxes, im_info, mode)
                if self.net_name == 'vgg16':
                    pooled_feat = pooled_feat.view(pooled_feat.size(0), -1)
                    fc7 = self.tail(pooled_feat)
                else:
                    fc7 = self.tail(pooled_feat).mean(3).mean(2)
                reid_feat = F.normalize(self.reid_feat_net(fc7))

                return reid_feat.data.cpu().numpy()

            else:
                raise KeyError(mode)
Esempio n. 6
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        # anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        # aaa = [t[:,2] - t[:,3] for t in targets]
        # bbb = torch.sum(torch.cat([a[:, 2] < a[:, 3] for a in anchors]))
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        N, A, H, W = objectness.shape
        N, AxC, H, W = box_regression.shape
        objectness = objectness.permute(0, 2, 3,
                                        1).reshape(-1)  # same shape as labels
        regression = box_regression.permute(0, 2, 3, 1).reshape(
            -1, AxC // A)  # same shape as regression targets (N,5)

        # objectness, box_regression = \
        #         concat_box_prediction_layers(objectness, box_regression)
        # objectness = objectness.squeeze()

        total_pos = sampled_pos_inds.numel()
        total_neg = sampled_neg_inds.numel()
        total_samples = total_pos + total_neg

        pos_regression = regression[
            sampled_pos_inds]  # (N, 5) -> xc,yc,w,h,theta
        pos_regression_targets = regression_targets[
            sampled_pos_inds]  # (N, 5) -> xc,yc,w,h,theta
        pos_angles = pos_regression[:, -1]  #.clone()
        pos_angles_targets = pos_regression_targets[:, -1]  #.clone()
        box_loss = smooth_l1_loss(
            # pos_regression[:,:-1].clone() - pos_regression_targets[:,:-1].clone(),
            pos_regression[:, :-1] - pos_regression_targets[:, :-1],
            beta=1.0 / 9,
            size_average=False,
        )
        #
        # # # for targets where the height and width are roughly similar, there may be ambiguity in angle regression
        # # # e.g. if height and width are equal, angle regression could be -90 or 0 degrees
        # # # we don't want to penalize this
        # #
        # THRESH = 0.12
        # all_matched_targets = torch.cat([t[mt_idxs] for t, mt_idxs in zip(targets, matched_target_idxs)], dim=0)[sampled_pos_inds]
        # target_w_to_h_ratio = torch.div(all_matched_targets[:, 2], all_matched_targets[:, 3])
        # target_w_to_h_ratio_diff = torch.abs(1.0 - target_w_to_h_ratio)
        # y = target_w_to_h_ratio_diff > THRESH
        # n = target_w_to_h_ratio_diff <= THRESH
        # angle_loss_y = torch.abs(torch.sin(pos_angles[y] - pos_angles_targets[y])).mean()
        # an = pos_angles_targets[n]
        #
        # # cond = n < beta
        # # loss = torch.where(pos_angles_targets[y], 0.5 * n ** 2 / beta, n - 0.5 * beta)
        # angle_loss_n = smooth_l1_loss(torch.sin(pos_angles[n] - pos_angles_targets[n])).mean()

        # angle_loss = (torch.sum(y) * angle_loss_y + torch.sum(n) * angle_loss_n) / total_pos
        angle_loss = torch.abs(torch.sin(pos_angles -
                                         pos_angles_targets)).mean()
        # angle_loss = smooth_l1_loss(torch.sin(pos_angles - pos_angles_targets))

        box_loss = box_loss / total_pos  # FOR SOME REASON sampled_inds.numel() WAS DEFAULT

        # objectness_weights = torch.ones_like(labels)
        # objectness_weights[sampled_pos_inds] = float(total_pos) / total_samples
        # objectness_weights[sampled_neg_inds] = float(total_neg) / total_samples

        # criterion = nn.BCELoss(reduce=False)
        # entropy_loss = criterion(objectness[sampled_inds].sigmoid(), labels[sampled_inds])
        # objectness_loss = torch.mul(entropy_loss, objectness_weights[sampled_inds]).mean()
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds],
            labels[sampled_inds]  #, weight=objectness_weights[sampled_inds]
        )

        return objectness_loss, box_loss, angle_loss
Esempio n. 7
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat(proposals["labels"], dim=0)
        regression_targets = cat(proposals["regression_targets"], dim=0)

        # get positive labels
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        total_pos = labels_pos.numel()

        if total_pos == 0:
            return labels_pos.sum(), labels_pos.sum(), labels_pos.sum(
            )  # all 0, sum is convenient to get torch tensor

        # perform weighted classification loss (to prevent class imbalance i.e. too many negative)
        with torch.no_grad():
            num_classes = class_logits.shape[-1]
            label_cnts = torch.stack([(labels == x).sum()
                                      for x in range(num_classes)])
            label_weights = 1.0 / label_cnts.to(dtype=torch.float32)
            label_weights /= num_classes  # equal class weighting
        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              weight=label_weights)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.arange(REGRESSION_CN,
                                    REGRESSION_CN * 2,
                                    device=device)
        else:
            map_inds = REGRESSION_CN * labels_pos[:, None] + torch.arange(
                REGRESSION_CN, device=device)

        pos_reg_pred = box_regression[sampled_pos_inds_subset[:, None],
                                      map_inds]
        pos_reg_targets = regression_targets[sampled_pos_inds_subset]
        box_loss = smooth_l1_loss(
            pos_reg_pred[:, :-1] - pos_reg_targets[:, :-1],
            size_average=False,
            beta=1,
        )

        angle_loss = torch.abs(
            torch.sin(pos_reg_pred[:, -1] - pos_reg_targets[:, -1])).mean()
        box_loss = 2.0 * box_loss / total_pos  #  labels.numel()

        return classification_loss, box_loss, angle_loss