def train(self, labels, data): """ Base class func Train Function called to train layer Args: -> labels | correct output | dtype: list -> Data | System input | dtype: tf.Tensor """ prediction = self(data, training=True) # Magic TF bindings for gradient decent with tf.GradientTape() as tape: # iterate anchors for index, pred, anchor in enumerate(prediction): # loss calc (see losses.py) cls_loss += keras.losses.binary_crossentropy( labels[index][1], pred[1]) reg_loss += smooth_l1_loss(labels[index][0], pred[0], anchor) loss = (cls_loss / 256) + (self.const * (reg_loss / (9 * 256))) # calculates gradients of last iteration grads = tape.gradient(loss, self.trainable_weights) # applies said graidents (with adam) self.optimiser.apply_gradients(zip(grads, self.trainable_weights))
def forward(self, head_features, gt_boxes, im_info, mode='gallery'): if self.is_train: rois, rpn_info, label, bbox_info, roi_trans_param = \ self.region_proposal(head_features, gt_boxes, im_info) rpn_label, rpn_bbox_info, rpn_cls_score, rpn_bbox_pred = rpn_info rpn_cls_score = rpn_cls_score.view(-1, 2) rpn_label = rpn_label.view(-1) rpn_select = Variable((rpn_label.data != -1)).nonzero().view(-1) rpn_cls_score = rpn_cls_score.index_select( 0, rpn_select).contiguous().view(-1, 2) rpn_label = rpn_label.index_select( 0, rpn_select).contiguous().view(-1) rpn_cls_loss = F.cross_entropy(rpn_cls_score, rpn_label) rpn_box_loss = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_info, sigma=3.0, dim=[1, 2, 3]) rpn_loss = (rpn_cls_loss, rpn_box_loss) # Roi-pooling (unable to work now) # pooled_feat = self.roi_pool(head_features, rois) # Crop and resize pooled_feat = self.pooling(head_features, rois, max_pool=False) transformed_feat = self.spatial_transform(pooled_feat, roi_trans_param) return pooled_feat, transformed_feat, rpn_loss, label, bbox_info else: if mode == 'gallery': rois, roi_trans_param = self.region_proposal( head_features, gt_boxes, im_info) # Roi-pooling (unable to work now) # pooled_feat = self.roi_pool(head_features, rois) # Crop and resize pooled_feat = self.pooling(head_features, rois, max_pool=False) transformed_feat = self.spatial_transform( pooled_feat, roi_trans_param) return rois, pooled_feat, transformed_feat elif mode == 'query': # TODO: whether to transform query # Roi-pooling (unable to work now) # pooled_feat = self.roi_pool(head_features, rois) # Crop and resize pooled_feat = self.pooling(head_features, gt_boxes, False) return pooled_feat else: raise KeyError(mode)
def train(self, labels, data): """ Base class func Train Function called to train layer This method has a unique training method, Anchors and scores are calculated by the rpn. The Classifier is then run on these anchors using the inverse losses from the scores as their respective labels. This method allows for both networks to be trained at the same time. The classifier trains better the better then RPN performs however. Args: -> labels | correct output | dtype: list -> Data | System input | dtype: tf.Tensor """ with tf.GradientTape() as tape: pred = self.rpn(data) # iterate anchors for index, pred, anchor in enumerate(pred): # rpn loss part 1 _cls = keras.losses.binary_crossentropy( labels[index][1], pred[1]) cls_loss += _cls # train the Classifier with tf.GraidentTape() as tape2: pred2 = self.classifier(anchor) class_loss = keras.losses.binary_crossentropy((1 - _cls), pred2) # Classifier grad decsent grad = tape.gradient(class_loss, self.classifier.trainable_weights) self.classifier.optimizer.apply_gradients( zip(grad, self.classifier.trainable_weights)) # RPN loss prt 2 the reckoning reg_loss += smooth_l1_loss(labels[index][0], pred[0], anchor) rpn_loss = (cls_loss / 256) + (self.rpn.const * (reg_loss / (9 * 256))) # RPN grad decsent grad = tape.gradient(rpn_loss, self.rpn.trainable_weights) self.rpn.optimizer.apply_gradients( zip(grad, self.rpn.trainable_weights))
def forward(self, head_features, gt_boxes, im_info, mode='gallery'): if self.is_train: if mode == 'gallery': rois, rpn_info, label, bbox_info = self.region_proposal( head_features, gt_boxes, im_info) rpn_label, rpn_bbox_info, rpn_cls_score, rpn_bbox_pred = \ rpn_info rpn_cls_score = rpn_cls_score.view(-1, 2) rpn_label = rpn_label.view(-1) rpn_select = Variable( (rpn_label.data != -1)).nonzero().view(-1) rpn_cls_score = rpn_cls_score.index_select( 0, rpn_select).contiguous().view(-1, 2) rpn_label = rpn_label.index_select( 0, rpn_select).contiguous().view(-1) rpn_cls_loss = F.cross_entropy(rpn_cls_score, rpn_label) rpn_box_loss = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_info, sigma=3.0, dim=[1, 2, 3]) rpn_loss = (rpn_cls_loss, rpn_box_loss) # TODO: add roi-pooling pooled_feat = self.pooling(head_features, rois, max_pool=False) return pooled_feat, rpn_loss, label, bbox_info elif mode == 'query': pooled_feat = self.pooling(head_features, gt_boxes, False) return pooled_feat else: raise KeyError(mode) else: if mode == 'gallery': rois = self.region_proposal(head_features, gt_boxes, im_info) pooled_feat = self.pooling(head_features, rois, max_pool=False) return rois, pooled_feat elif mode == 'query': pooled_feat = self.pooling(head_features, gt_boxes, False) return pooled_feat else: raise KeyError(mode)
def forward(self, im_data, gt_boxes, im_info, mode='gallery'): if self.is_train: net_conv = self.head(im_data) # returned parameters contain 3 tuples here pooled_feat, trans_feat, rpn_loss, label, bbox_info = self.strpn( net_conv, gt_boxes, im_info) if self.net_name == 'vgg16': pooled_feat = pooled_feat.view(pooled_feat.size(0), -1) fc7 = self.tail(pooled_feat) else: fc7 = self.tail(pooled_feat).mean(3).mean(2) cls_score = self.cls_score_net(fc7) bbox_pred = self.bbox_pred_net(fc7) reid_fc7 = self.tail(trans_feat).mean(3).mean(2) reid_feat = F.normalize(self.reid_feat_net(reid_fc7)) # reid_feat = F.normalize(self.reid_feat_net(fc7)) cls_pred = torch.max(cls_score, 1)[1] cls_prob = F.softmax(cls_score, 1) det_label, pid_label = label det_label = det_label.view(-1) cls_loss = F.cross_entropy(cls_score.view(-1, 2), det_label) bbox_loss = smooth_l1_loss(bbox_pred, bbox_info) reid_loss = oim_loss(reid_feat, pid_label, self.lut, self.queue, gt_boxes.size(0), self.lut_momentum) rpn_cls_loss, rpn_box_loss = rpn_loss return rpn_cls_loss, rpn_box_loss, cls_loss, bbox_loss, reid_loss else: if mode == 'gallery': net_conv = self.head(im_data) rois, pooled_feat, trans_feat = self.strpn( net_conv, gt_boxes, im_info) if self.net_name == 'vgg16': pooled_feat = pooled_feat.view(pooled_feat.size(0), -1) fc7 = self.tail(pooled_feat) else: fc7 = self.tail(pooled_feat).mean(3).mean(2) cls_score = self.cls_score_net(fc7) bbox_pred = self.bbox_pred_net(fc7) reid_fc7 = self.tail(trans_feat).mean(3).mean(2) reid_feat = F.normalize(self.reid_feat_net(reid_fc7)) # reid_feat = F.normalize(self.reid_feat_net(fc7)) cls_pred = torch.max(cls_score, 1)[1] cls_prob = F.softmax(cls_score, 1) with open('config.yml', 'r') as f: config = yaml.load(f) mean = config['train_bbox_normalize_means'] std = config['train_bbox_normalize_stds'] means = bbox_pred.data.new(mean).repeat(2).unsqueeze( 0).expand_as(bbox_pred) stds = bbox_pred.data.new(std).repeat(2).unsqueeze( 0).expand_as(bbox_pred) bbox_pred = bbox_pred.mul(Variable(stds)).add(Variable(means)) cls_prob = cls_prob.data.cpu().numpy() bbox_pred = bbox_pred.data.cpu().numpy() rois = rois.data.cpu().numpy() reid_feat = reid_feat.data.cpu().numpy() return cls_prob, bbox_pred, rois, reid_feat elif mode == 'query': net_conv = self.head(im_data) # TODO: move pooling layer from strpn to SIPN pooled_feat = self.strpn(net_conv, gt_boxes, im_info, mode) if self.net_name == 'vgg16': pooled_feat = pooled_feat.view(pooled_feat.size(0), -1) fc7 = self.tail(pooled_feat) else: fc7 = self.tail(pooled_feat).mean(3).mean(2) reid_feat = F.normalize(self.reid_feat_net(fc7)) return reid_feat.data.cpu().numpy() else: raise KeyError(mode)
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ # anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] # aaa = [t[:,2] - t[:,3] for t in targets] # bbb = torch.sum(torch.cat([a[:, 2] < a[:, 3] for a in anchors])) labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) N, A, H, W = objectness.shape N, AxC, H, W = box_regression.shape objectness = objectness.permute(0, 2, 3, 1).reshape(-1) # same shape as labels regression = box_regression.permute(0, 2, 3, 1).reshape( -1, AxC // A) # same shape as regression targets (N,5) # objectness, box_regression = \ # concat_box_prediction_layers(objectness, box_regression) # objectness = objectness.squeeze() total_pos = sampled_pos_inds.numel() total_neg = sampled_neg_inds.numel() total_samples = total_pos + total_neg pos_regression = regression[ sampled_pos_inds] # (N, 5) -> xc,yc,w,h,theta pos_regression_targets = regression_targets[ sampled_pos_inds] # (N, 5) -> xc,yc,w,h,theta pos_angles = pos_regression[:, -1] #.clone() pos_angles_targets = pos_regression_targets[:, -1] #.clone() box_loss = smooth_l1_loss( # pos_regression[:,:-1].clone() - pos_regression_targets[:,:-1].clone(), pos_regression[:, :-1] - pos_regression_targets[:, :-1], beta=1.0 / 9, size_average=False, ) # # # # for targets where the height and width are roughly similar, there may be ambiguity in angle regression # # # e.g. if height and width are equal, angle regression could be -90 or 0 degrees # # # we don't want to penalize this # # # THRESH = 0.12 # all_matched_targets = torch.cat([t[mt_idxs] for t, mt_idxs in zip(targets, matched_target_idxs)], dim=0)[sampled_pos_inds] # target_w_to_h_ratio = torch.div(all_matched_targets[:, 2], all_matched_targets[:, 3]) # target_w_to_h_ratio_diff = torch.abs(1.0 - target_w_to_h_ratio) # y = target_w_to_h_ratio_diff > THRESH # n = target_w_to_h_ratio_diff <= THRESH # angle_loss_y = torch.abs(torch.sin(pos_angles[y] - pos_angles_targets[y])).mean() # an = pos_angles_targets[n] # # # cond = n < beta # # loss = torch.where(pos_angles_targets[y], 0.5 * n ** 2 / beta, n - 0.5 * beta) # angle_loss_n = smooth_l1_loss(torch.sin(pos_angles[n] - pos_angles_targets[n])).mean() # angle_loss = (torch.sum(y) * angle_loss_y + torch.sum(n) * angle_loss_n) / total_pos angle_loss = torch.abs(torch.sin(pos_angles - pos_angles_targets)).mean() # angle_loss = smooth_l1_loss(torch.sin(pos_angles - pos_angles_targets)) box_loss = box_loss / total_pos # FOR SOME REASON sampled_inds.numel() WAS DEFAULT # objectness_weights = torch.ones_like(labels) # objectness_weights[sampled_pos_inds] = float(total_pos) / total_samples # objectness_weights[sampled_neg_inds] = float(total_neg) / total_samples # criterion = nn.BCELoss(reduce=False) # entropy_loss = criterion(objectness[sampled_inds].sigmoid(), labels[sampled_inds]) # objectness_loss = torch.mul(entropy_loss, objectness_weights[sampled_inds]).mean() objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] #, weight=objectness_weights[sampled_inds] ) return objectness_loss, box_loss, angle_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat(proposals["labels"], dim=0) regression_targets = cat(proposals["regression_targets"], dim=0) # get positive labels sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] total_pos = labels_pos.numel() if total_pos == 0: return labels_pos.sum(), labels_pos.sum(), labels_pos.sum( ) # all 0, sum is convenient to get torch tensor # perform weighted classification loss (to prevent class imbalance i.e. too many negative) with torch.no_grad(): num_classes = class_logits.shape[-1] label_cnts = torch.stack([(labels == x).sum() for x in range(num_classes)]) label_weights = 1.0 / label_cnts.to(dtype=torch.float32) label_weights /= num_classes # equal class weighting classification_loss = F.cross_entropy(class_logits, labels, weight=label_weights) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing if self.cls_agnostic_bbox_reg: map_inds = torch.arange(REGRESSION_CN, REGRESSION_CN * 2, device=device) else: map_inds = REGRESSION_CN * labels_pos[:, None] + torch.arange( REGRESSION_CN, device=device) pos_reg_pred = box_regression[sampled_pos_inds_subset[:, None], map_inds] pos_reg_targets = regression_targets[sampled_pos_inds_subset] box_loss = smooth_l1_loss( pos_reg_pred[:, :-1] - pos_reg_targets[:, :-1], size_average=False, beta=1, ) angle_loss = torch.abs( torch.sin(pos_reg_pred[:, -1] - pos_reg_targets[:, -1])).mean() box_loss = 2.0 * box_loss / total_pos # labels.numel() return classification_loss, box_loss, angle_loss