def init(self, time=5, batchsize=3, height=256, width=256, allow_low_quality_matches=False, bg_iou_threshold=0.4, fg_iou_threshold=0.5): self.batchsize = batchsize self.time = time self.height, self.width = height, width self.bg_iou_threshold = bg_iou_threshold self.fg_iou_threshold = fg_iou_threshold self.allow_low_quality_matches = allow_low_quality_matches self.num_classes = 3 self.box_generators = [ Animation(self.height, self.width, 3, max_objects=3, max_classes=3) for i in range(self.batchsize) ] self.box_coder = Anchors( allow_low_quality_matches=allow_low_quality_matches, bg_iou_threshold=bg_iou_threshold, fg_iou_threshold=fg_iou_threshold) self.fmaps = [] for i in range(self.box_coder.num_levels): self.fmaps += [ torch.zeros((self.batchsize, 1, self.height >> (3 + i), self.width >> (3 + i))) ] targets = [[ torch.from_numpy(self.box_generators[i].run()) for i in range(self.batchsize) ] for t in range(self.time)] return targets
def pytestcase_all_gt_should_be_matched_even_low_iou(self): """ Boxes with small iou should be matched :return: """ box_coder = Anchors(allow_low_quality_matches=True) anchors_xyxy = torch.tensor( [[25, 25, 250, 250], [20, 20, 50, 50], [3, 3, 4, 4]], dtype=torch.float32) anchors = box.change_box_order(anchors_xyxy, 'xyxy2xywh') targets = torch.tensor([[120, 120, 250, 250, 1], [20, 20, 22, 22, 2]], dtype=torch.float32) targets = [[targets]] _, cls_targets = box_coder.encode(anchors, anchors_xyxy, targets) assert len(torch.unique( cls_targets)) == 3 # first box and +1 is for background class
def __init__(self, feature_extractor=FPN, rpn=BoxHead, in_channels=3, num_classes=2, act='sigmoid', ratios=[0.5, 1.0, 2.0], scales=[1.0, 2**1. / 3, 2**2. / 3], nlayers=0, loss='_focal_loss'): super(SingleStageDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.in_channels = in_channels self.feature_extractor = feature_extractor(in_channels) self.box_coder = Anchors(num_levels=self.feature_extractor.levels, scales=scales, ratios=ratios, allow_low_quality_matches=False, variances=[1.0, 1.0], fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act if rpn == BoxHead: self.rpn = BoxHead(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act, nlayers) elif rpn == SSDHead: self.rpn = SSDHead(self.feature_extractor.out_channel_list, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) else: raise NotImplementedError() self.criterion = DetectionLoss(act + loss)
def __init__(self, feature_extractor=FPN, rpn=BoxHead, num_classes=2, cin=2, act='sigmoid', ratios=[1.0], scales=[1.0, 1.5]): super(TwoStageDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.cin = cin self.feature_extractor = feature_extractor(cin) self.box_coder = Anchors(pyramid_levels=[ i for i in range(3, 3 + self.feature_extractor.levels) ], scales=scales, ratios=ratios, fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act self.first_stage = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, 1, 'sigmoid', n_layers=0) feat_names = [ 'feat' + str(i) for i in range(self.feature_extractor.levels) ] self.roi_pool = pool.MultiScaleRoIAlign(feat_names, 5, 2) self.second_stage = FCHead(self.feature_extractor.cout * 5 * 5, self.num_classes + self.label_offset, act) self.criterion = DetectionLoss('sigmoid_focal_loss')
def __init__(self, feature_extractor=MobileNetFPN, rpn=BoxHead, num_classes=2, cin=2, act='sigmoid'): super(RefinedDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.cin = cin self.feature_extractor = feature_extractor(cin) self.box_coder = Anchors(pyramid_levels=[ i for i in range(3, 3 + self.feature_extractor.levels) ], scales=[1.0, 2**1. / 3, 2**2. / 3], ratios=[0.5, 1.0, 2.0], fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act self.rpn = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) # refinement self.feature_extractor2 = FeaturePyramidNetwork( [self.feature_extractor.cout] * self.feature_extractor.levels, self.feature_extractor.cout) self.rpn2 = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) self.criterion = DetectionLoss('sigmoid_focal_loss')
class TestAnchors(object): """ test of box coder class. """ def init(self, time=5, batchsize=3, height=256, width=256, allow_low_quality_matches=False, bg_iou_threshold=0.4, fg_iou_threshold=0.5): self.batchsize = batchsize self.time = time self.height, self.width = height, width self.bg_iou_threshold = bg_iou_threshold self.fg_iou_threshold = fg_iou_threshold self.allow_low_quality_matches = allow_low_quality_matches self.num_classes = 3 self.box_generators = [ Animation(self.height, self.width, 3, max_objects=3, max_classes=3) for i in range(self.batchsize) ] self.box_coder = Anchors( allow_low_quality_matches=allow_low_quality_matches, bg_iou_threshold=bg_iou_threshold, fg_iou_threshold=fg_iou_threshold) self.fmaps = [] for i in range(self.box_coder.num_levels): self.fmaps += [ torch.zeros((self.batchsize, 1, self.height >> (3 + i), self.width >> (3 + i))) ] targets = [[ torch.from_numpy(self.box_generators[i].run()) for i in range(self.batchsize) ] for t in range(self.time)] return targets def assert_equal(self, x, y): assert (x - y).abs().max().item() == 0 def abs_diff(self, x, y): diff = (x - y).abs() return diff, diff.max().item() def cat_diff(self, x, y): u1 = torch.unique(x, return_counts=True)[1] u2 = torch.unique(y, return_counts=True)[1] return u1 - u2 def encode_sequential(self, targets, anchors, anchors_xyxy, fg_iou_threshold, bg_iou_threshold, allow_low_quality_matches, remove_dummies=False): gt_padded, sizes = box.pack_boxes_list_of_list(targets) all_loc, all_cls = [], [] for t in range(len(gt_padded)): gt_boxes, gt_labels = gt_padded[t, :, :4], gt_padded[t, :, -1] if remove_dummies: max_size = sizes[t] gt_boxes, gt_labels = gt_boxes[:max_size], gt_labels[:max_size] boxes, cls_t = box.assign_priors(gt_boxes, gt_labels, anchors_xyxy, fg_iou_threshold, bg_iou_threshold, allow_low_quality_matches) loc_t = box.bbox_to_deltas(boxes, anchors, [1, 1]) all_loc.append(loc_t.unsqueeze(0)) all_cls.append(cls_t.unsqueeze(0).long()) all_loc = torch.cat(all_loc, dim=0) # (N,#anchors,4) all_cls = torch.cat(all_cls, dim=0) # (N,#anchors,C) return all_loc, all_cls def pytestcase_batch_box_iou(self): targets = self.init(7, 3) anchors, anchors_xyxy = self.box_coder(self.fmaps, (self.height, self.width)) gt_padded, _ = box.pack_boxes_list_of_list(targets) gt_boxes = gt_padded[..., :4] batch_iou = box.batch_box_iou(anchors_xyxy, gt_boxes.clone()) for t in range(len(gt_padded)): iou_t = box.box_iou(anchors_xyxy, gt_boxes[t]) max_abs_diff = self.abs_diff(batch_iou[t], iou_t)[1] assert max_abs_diff == 0 def pytestcase_batched_encode_only_best_quality(self): targets = self.init(3, 7, allow_low_quality_matches=False) anchors, anchors_xyxy = self.box_coder(self.fmaps, (self.height, self.width)) loc_targets, cls_targets = self.box_coder.encode( anchors, anchors_xyxy, targets) loc_targets2, cls_targets2 = self.encode_sequential( targets, anchors, anchors_xyxy, self.box_coder.fg_iou_threshold, self.box_coder.bg_iou_threshold, self.box_coder.allow_low_quality_matches, remove_dummies=True) loc_diff, max_loc_diff = self.abs_diff(loc_targets, loc_targets2) cls_diff, max_cls_diff = self.abs_diff(cls_targets, cls_targets2) cat_diff = self.cat_diff(cls_targets, cls_targets2) assert max_loc_diff == 0 assert max_cls_diff == 0 assert cat_diff.abs().max() == 0 def pytestcase_batched_encode_allow_low_quality(self): targets = self.init(3, 7, allow_low_quality_matches=True) anchors, anchors_xyxy = self.box_coder(self.fmaps, (self.height, self.width)) loc_targets, cls_targets = self.box_coder.encode( anchors, anchors_xyxy, targets) loc_targets2, cls_targets2 = self.encode_sequential( targets, anchors, anchors_xyxy, self.box_coder.fg_iou_threshold, self.box_coder.bg_iou_threshold, self.box_coder.allow_low_quality_matches, remove_dummies=True) loc_diff, max_loc_diff = self.abs_diff(loc_targets, loc_targets2) cls_diff, max_cls_diff = self.abs_diff(cls_targets, cls_targets2) cat_diff = self.cat_diff(cls_targets, cls_targets2) assert max_loc_diff == 0 assert max_cls_diff == 0 assert cat_diff.abs().max() == 0 def one_hot(self, y, num_classes): y2 = y.unsqueeze(2) fg = (y2 > 0).float() y_index = (y2 - 1).clamp_(0) t = torch.zeros((y.shape[0], y.shape[1], num_classes), dtype=torch.float) t.scatter_(2, y_index, fg) return t def pytestcase_batched_decode_boxes(self): targets = self.init(1, 1, allow_low_quality_matches=True, bg_iou_threshold=0.4, fg_iou_threshold=0.5) anchors, anchors_xyxy = self.box_coder(self.fmaps, (self.height, self.width)) loc_targets, cls_targets = self.box_coder.encode( anchors, anchors_xyxy, targets) scores = self.one_hot(cls_targets, self.num_classes) self.box_coder.decode_func = self.box_coder.decode_per_image boxes1 = self.box_coder.decode(anchors, loc_targets.clone(), scores, self.batchsize, 0.99) self.box_coder.decode_func = self.box_coder.batched_decode boxes2 = self.box_coder.decode(anchors, loc_targets, scores, self.batchsize, 0.99) for t in range(self.time): for x, y in zip(boxes1[t], boxes2[t]): b, s, l = x b2, s2, l2 = y self.assert_equal(b, b2) self.assert_equal(s, s2) self.assert_equal(l, l2) def pytestcase_all_gt_should_be_matched_even_low_iou(self): """ Boxes with small iou should be matched :return: """ box_coder = Anchors(allow_low_quality_matches=True) anchors_xyxy = torch.tensor( [[25, 25, 250, 250], [20, 20, 50, 50], [3, 3, 4, 4]], dtype=torch.float32) anchors = box.change_box_order(anchors_xyxy, 'xyxy2xywh') targets = torch.tensor([[120, 120, 250, 250, 1], [20, 20, 22, 22, 2]], dtype=torch.float32) targets = [[targets]] _, cls_targets = box_coder.encode(anchors, anchors_xyxy, targets) assert len(torch.unique( cls_targets)) == 3 # first box and +1 is for background class
class SingleStageDetector(nn.Module): def __init__(self, feature_extractor=FPN, rpn=BoxHead, in_channels=3, num_classes=2, act='sigmoid', ratios=[0.5, 1.0, 2.0], scales=[1.0, 2**1. / 3, 2**2. / 3], nlayers=0, loss='_focal_loss'): super(SingleStageDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.in_channels = in_channels self.feature_extractor = feature_extractor(in_channels) self.box_coder = Anchors(num_levels=self.feature_extractor.levels, scales=scales, ratios=ratios, allow_low_quality_matches=False, variances=[1.0, 1.0], fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act if rpn == BoxHead: self.rpn = BoxHead(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act, nlayers) elif rpn == SSDHead: self.rpn = SSDHead(self.feature_extractor.out_channel_list, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) else: raise NotImplementedError() self.criterion = DetectionLoss(act + loss) def reset(self, mask=None): self.feature_extractor.reset(mask) def forward(self, x): xs = self.feature_extractor(x) return self.rpn(xs) def compute_loss(self, x, targets): xs = self.feature_extractor(x) loc_preds, cls_preds = self.rpn(xs) with torch.no_grad(): anchors, anchors_xyxy = self.box_coder(xs, x.shape[-2:]) loc_targets, cls_targets = self.box_coder.encode( anchors, anchors_xyxy, targets) assert cls_targets.shape[1] == cls_preds.shape[1] loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets) # att_loss = attention_loss(self.feature_extractor, x, targets, sequence_upsample, box_drawing) loss_dict = {'loc': loc_loss, 'cls_loss': cls_loss} return loss_dict def get_boxes(self, x, score_thresh=0.4): xs = self.feature_extractor(x) loc_preds, cls_preds = self.rpn(xs) cls_preds = self.rpn.probas(cls_preds) scores = cls_preds[..., self.label_offset:].contiguous() anchors, _ = self.box_coder(xs, x.shape[-2:]) targets = self.box_coder.decode(anchors, loc_preds, scores, x.size(1), score_thresh=score_thresh) return targets @classmethod def mnist_vanilla_rnn(cls, in_channels, num_classes, act='softmax', loss='_ohem_loss'): return cls(Vanilla, BoxHead, in_channels, num_classes, act, ratios=[1.0], scales=[1.0, 1.5], loss=loss) @classmethod def mnist_fb_rnn(cls, in_channels, num_classes, act='sigmoid', loss='_focal_loss'): return cls(FBN, BoxHead, in_channels, num_classes, act, ratios=[1.0], scales=[1.0, 1.5], loss=loss) @classmethod def mnist_unet_rnn(cls, in_channels, num_classes, act='sigmoid', loss='_focal_loss'): return cls(FPN, BoxHead, in_channels, num_classes, act, ratios=[1.0], scales=[1.0, 1.5], loss=loss) @classmethod def mobilenet_v2_fpn(cls, in_channels, num_classes, act='sigmoid', loss='_focal_loss', nlayers=3): return cls(MobileNetFPN, BoxHead, in_channels, num_classes, act, loss=loss, nlayers=nlayers) @classmethod def resnet50_fpn(cls, in_channels, num_classes, act='sigmoid', loss='_focal_loss', nlayers=3): return cls(ResNet50FPN, BoxHead, in_channels, num_classes, act, loss=loss, nlayers=nlayers) @classmethod def resnet50_ssd(cls, in_channels, num_classes, act='sigmoid', loss='_focal_loss', nlayers=0): return cls(ResNet50SSD, SSDHead, in_channels, num_classes, act=act, loss=loss, nlayers=nlayers)
class TwoStageDetector(nn.Module): def __init__(self, feature_extractor=FPN, rpn=BoxHead, num_classes=2, cin=2, act='sigmoid', ratios=[1.0], scales=[1.0, 1.5]): super(TwoStageDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.cin = cin self.feature_extractor = feature_extractor(cin) self.box_coder = Anchors(pyramid_levels=[ i for i in range(3, 3 + self.feature_extractor.levels) ], scales=scales, ratios=ratios, fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act self.first_stage = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, 1, 'sigmoid', n_layers=0) feat_names = [ 'feat' + str(i) for i in range(self.feature_extractor.levels) ] self.roi_pool = pool.MultiScaleRoIAlign(feat_names, 5, 2) self.second_stage = FCHead(self.feature_extractor.cout * 5 * 5, self.num_classes + self.label_offset, act) self.criterion = DetectionLoss('sigmoid_focal_loss') def reset(self): self.feature_extractor.reset() def forward(self, x, score_thresh=0.4): batchsize = x.size(1) xs = self.feature_extractor(x) loc_preds, cls_preds = self.first_stage(xs) anchors, anchors_xyxy = self.box_coder(xs) proposals = self.box_coder.decode(anchors, loc_preds, cls_preds.sigmoid(), batchsize, score_thresh=score_thresh) image_sizes = [x.shape[-2:]] * x.size(0) * x.size(1) sources = {'feat' + str(i): item for i, item in enumerate(xs)} rois, rois_xyxy, sizes, batch_index = self.gather_boxes(proposals) if len(rois) > 0: out = self.roi_pool(sources, rois, image_sizes) rois, rois_xyxy = torch.cat(rois), torch.cat(rois_xyxy) loc_preds2, cls_preds2 = self.second_stage(out) else: loc_preds2, cls_preds2, rois, rois_xyxy, batch_index = None, None, None, None, None out_dic = Struct( **{ 'first_stage': Struct( **{ 'loc': loc_preds, 'cls': cls_preds, 'proposals': proposals, 'rois': rois, 'rois_xyxy': rois_xyxy, 'sizes': sizes, 'idxs': batch_index }), 'second_stage': Struct(**{ 'loc': loc_preds2, 'cls': cls_preds2 }), 'anchors': anchors, 'anchors_xyxy': anchors_xyxy }) return out_dic def gather_boxes(self, proposals): #this expects list of tensor of shape N, 4 idxs = [] sizes = [] rois = [] rois_xyxy = [] stride = len(proposals) for t in range(len(proposals)): for i in range(len(proposals[t])): boxes, _, _ = proposals[t][i] num = len(boxes) if boxes is not None else 0 sizes += [num] if num > 0: boxes = boxes.detach() rois_xyxy += [boxes] rois += [box.change_box_order(boxes, 'xyxy2xywh')] idxs += [t * stride + i] * num idxs = torch.LongTensor(idxs).to(rois[0].device) return rois, rois_xyxy, sizes, idxs def compute_loss(self, x, targets): out = self(x) #first stage loss with torch.no_grad(): loc_targets, cls_targets = self.box_coder.encode( out.anchors, out.anchors_xyxy, binarize_targets(targets)) loc_loss, cls_loss = self.criterion(out.first_stage.loc, loc_targets, out.first_stage.cls, cls_targets) loss_dict = {'loc1': loc_loss, 'cls1': cls_loss} #second stage loss if out.second_stage.loc is not None: with torch.no_grad(): loc_targets2, cls_targets2 = self.box_coder.encode( out.first_stage.rois, out.first_stage.rois_xyxy, targets) import pdb pdb.set_trace() loc_loss, cls_loss = self.criterion(out.second_stage.loc, loc_targets2, out.second_stage.cls, cls_targets2) loss_dict.update({'loc2': loc_loss, 'cls2': cls_loss}) return loss_dict def get_boxes(self, x, score_thresh=0.4): batchsize = x.size(1) out = self(x) scores, idxs = out.second_stage.cls.sigmoid().max(dim=1) idxs = out.first_stage.batch_index * self.num_classes + idxs box_preds = box.deltas_to_bbox(out.second_stage.loc, out.first_stage.rois, [1, 1]) boxes, scores, labels, batch_index = self.box_coder.batched_decode_with_idxs( box_preds, scores, idxs, self.num_anchors, self.num_classes, batchsize, 0.5, 0.5) targets = self.box_coder.flatten_box_list_to_list_of_list( boxes, scores, labels, batch_index, batchsize) return targets
class RefinedDetector(nn.Module): def __init__(self, feature_extractor=MobileNetFPN, rpn=BoxHead, num_classes=2, cin=2, act='sigmoid'): super(RefinedDetector, self).__init__() self.label_offset = 1 * (act == 'softmax') self.num_classes = num_classes self.cin = cin self.feature_extractor = feature_extractor(cin) self.box_coder = Anchors(pyramid_levels=[ i for i in range(3, 3 + self.feature_extractor.levels) ], scales=[1.0, 2**1. / 3, 2**2. / 3], ratios=[0.5, 1.0, 2.0], fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors self.act = act self.rpn = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) # refinement self.feature_extractor2 = FeaturePyramidNetwork( [self.feature_extractor.cout] * self.feature_extractor.levels, self.feature_extractor.cout) self.rpn2 = rpn(self.feature_extractor.cout, self.box_coder.num_anchors, self.num_classes + self.label_offset, act) self.criterion = DetectionLoss('sigmoid_focal_loss') def reset(self): self.feature_extractor.reset() def forward(self, x): xs = self.feature_extractor(x) ys = self.feature_extractor2(xs) return self.rpn(xs), self.rpn(ys) def compute_loss(self, x, targets): xs = self.feature_extractor(x) ys = self.feature_extractor2(xs) loc_preds, cls_preds = self.rpn(xs) loc_preds2, cls_preds2 = self.rpn(xs) with torch.no_grad(): loc_targets, cls_targets = self.box_coder.encode(xs, targets) with torch.no_grad(): anchors, anchors_xyxy = self.box_coder(xs) anchors2 = box.deltas_to_bbox(loc_preds, anchors) anchors2xyxy = box.change_box_order(anchors2, 'xywh2xyxy') loc_targets2, cls_targets2 = self.box_coder.encode_with_anchors( anchors2, anchors2xyxy, targets) #cls_targets *= cls_preds? assert cls_targets.shape[1] == cls_preds.shape[1] loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets) loc_loss2, cls_loss2 = self.criterion(loc_preds2, loc_targets2, cls_preds2, cls_targets2) loss_dict = { 'loc': loc_loss, 'cls_loss': cls_loss, 'loc2': loc_loss2, 'cls_loss2': cls_loss2 } return loss_dict def get_refined_anchors(self, xs, loc_preds): anchors, _ = self.box_coder(xs) anchors2 = box.deltas_to_bbox(loc_preds, anchors) anchors2xyxy = box.change_box_order(anchors2, 'xywh2xyxy') return anchors2, anchors2xyxy def get_boxes(self, x, score_thresh=0.4): xs = self.feature_extractor(x) ys = self.feature_extractor2(xs) loc_preds, cls_preds = self.rpn2(ys) loc_preds2, cls_preds2 = self.rpn2(ys) anchors, anchorsxyxy = self.get_refined_anchors(xs, loc_preds) scores = cls_preds2[..., self.label_offset:].contiguous() targets = self.box_coder.decode_with_anchors(anchors, anchorsxyxy, loc_preds, scores, x.size(1), score_thresh=score_thresh) return targets
def __init__(self, feature_extractor=FPN, num_classes=2, cin=2, height=300, width=300, act='sigmoid', shared=True): super(SSD, self).__init__() self.num_classes = num_classes self.height, self.width = height, width self.cin = cin self.extractor = feature_extractor(cin) x = torch.randn(1, 1, self.cin, self.height, self.width) sources = self.extractor(x) if USE_ANCHOR_MODULE: self.box_coder = Anchors( pyramid_levels=[i for i in range(3, 3 + len(sources))], scales=[1.0, 1.5], ratios=[1], label_offset=1, fg_iou_threshold=0.5, bg_iou_threshold=0.4) self.num_anchors = self.box_coder.num_anchors else: self.fm_sizes, self.steps, self.box_sizes = get_box_params_fixed_size( sources, height, width) self.ary = float(width) / height self.aspect_ratios = [1] self.scales = [1, 1.5] self.num_anchors = len(self.aspect_ratios) * len( self.scales ) # self.num_anchors = 2 * len(self.aspect_ratios) + 2 self.box_coder = SSDBoxCoder(self, 0.7, 0.4) self.aspect_ratios = [] self.in_channels = [item.size(1) for item in sources] self.shared = shared self.act = act self.use_embedding_loss = False if self.shared: self.embedding_dims = 32 self.loc_head = self._make_head(self.in_channels[0], self.num_anchors * 4) self.cls_head = self._make_head( self.in_channels[0], self.num_anchors * self.num_classes) if self.use_embedding_loss: self.emb_head = self._make_head( self.in_channels[0], self.num_anchors * self.embedding_dims) torch.nn.init.normal_(self.loc_head[-1].weight, std=0.01) torch.nn.init.constant_(self.loc_head[-1].bias, 0) if self.act == 'softmax': self.softmax_init(self.cls_head[-1]) else: self.sigmoid_init(self.cls_head[-1]) else: self.cls_layers = nn.ModuleList() self.reg_layers = nn.ModuleList() for i in range(len(self.in_channels)): self.reg_layers += [ nn.Conv2d(self.in_channels[i], self.num_anchors * 4, kernel_size=3, padding=1, stride=1) ] self.cls_layers += [ nn.Conv2d(self.in_channels[i], self.num_anchors * self.num_classes, kernel_size=3, padding=1, stride=1) ] for l in self.reg_layers: torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # Init for strong bias toward bg class for focal loss if self.act == 'softmax': self.softmax_init(self.cls_layers[-1]) else: self.sigmoid_init(self.cls_layers[-1]) self.criterion = SSDLoss(num_classes=num_classes, mode='focal', use_sigmoid=self.act == 'sigmoid', use_iou=False) self._forward = [self._forward_unshared, self._forward_shared][shared]
def __init__(self, anno_lines, input_shape, num_classes, batch_size): self.anno_lines = anno_lines self.input_shape = input_shape self.num_classes = num_classes self.batch_size = batch_size self.Anchor = Anchors()
class DataGenerator: def __init__(self, anno_lines, input_shape, num_classes, batch_size): self.anno_lines = anno_lines self.input_shape = input_shape self.num_classes = num_classes self.batch_size = batch_size self.Anchor = Anchors() def data_generate(self): """ data generator Returns: image_data: tf.tensor target_3: p3 stage's outputs [[labels, delta, label_weights, box_weights]] target_4: p4 stage's outputs [[labels, delta, label_weights, box_weights]] target_5: p5 stage's outputs [[labels, delta, label_weights, box_weights]] target_6: p6 stage's outputs [[labels, delta, label_weights, box_weights]] target_7: p7 stage's outputs [[labels, delta, label_weights, box_weights]] """ anchors = self.Anchor.anchors_generator(self.input_shape) n = len(self.anno_lines) i = 0 while True: image_data = [] true_box = [] true_class = [] for j in range(self.batch_size): if i == 0: np.random.shuffle(self.anno_lines) image, boxes = get_random_data(self.anno_lines[i], self.input_shape) results = self.Anchor.anchors_target_total( anchors, boxes, self.num_classes, self.input_shape) image_data.append(image) true_box.append(results[0]) true_class.append(results[1]) i = (i + 1) % n image_data = tf.stack(image_data, axis=0) true_box = tf.stack(true_box, axis=0) true_class = tf.stack(true_class, axis=0) yield [image_data, [true_box, true_class]], tf.zeros(shape=(self.batch_size, )) # ######################################### # this function has been deprecated # ######################################### def data_generate_other(self): anchors = self.Anchor.anchors_generator(self.input_shape) n = len(self.anno_lines) i = 0 while True: targets_3 = [] targets_4 = [] targets_5 = [] targets_6 = [] targets_7 = [] image_data = [] for j in range(self.batch_size): if i == 0: np.random.shuffle(self.anno_lines) image, boxes = get_random_data(self.anno_lines[i], self.input_shape) results = self.Anchor.anchors_target_total( anchors, boxes, self.num_classes, self.input_shape) image_data.append(image) targets_3.append(results[0]) targets_4.append(results[1]) targets_5.append(results[2]) targets_6.append(results[3]) targets_7.append(results[4]) i = (i + 1) % n image_data = tf.stack(image_data, axis=0) yield [ image_data, [targets_3, targets_4, targets_5, targets_6, targets_7] ], tf.zeros(shape=(2, ))