def __init__(self, num_classes, in_channels, feat_channels=256, octave_base_scale=8, scales_per_octave=3, octave_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, anchoring_means=[.0, .0, .0, .0], anchoring_stds=[1.0, 1.0, 1.0, 1.0], target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loc_filter_thr=0.01, loc_focal_loss=True, cls_sigmoid_loss=False, cls_focal_loss=False): super(GuidedAnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.octave_base_scale = octave_base_scale self.scales_per_octave = scales_per_octave self.octave_scales = octave_base_scale * np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) self.approxs_per_octave = len(self.octave_scales) * len(octave_ratios) self.octave_ratios = octave_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.anchoring_means = anchoring_means self.anchoring_stds = anchoring_stds self.target_means = target_means self.target_stds = target_stds self.loc_filter_thr = loc_filter_thr self.loc_focal_loss = loc_focal_loss assert self.loc_focal_loss, 'only focal loss is supported in loc' self.cls_sigmoid_loss = cls_sigmoid_loss self.cls_focal_loss = cls_focal_loss self.approx_generators = [] self.base_approx_generators = [] for anchor_base in self.anchor_base_sizes: self.approx_generators.append( AnchorGenerator(anchor_base, self.octave_scales, self.octave_ratios)) self.base_approx_generators.append( AnchorGenerator(anchor_base, [self.octave_scales[0]], [1.0])) # one anchor per location self.num_anchors = 1 if self.cls_sigmoid_loss: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, text_anchor_ratios=[0.5, 1.0, 2.0], char_anchor_ratios=[0.5, 1.0, 2.0], target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AETSAnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.text_anchor_ratios = text_anchor_ratios self.char_anchor_ratios = char_anchor_ratios self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, text_anchor_ratios)) self.char_anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.char_anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, char_anchor_ratios)) self.num_anchors = (len(self.text_anchor_ratios) + len( self.char_anchor_ratios)) * len(self.anchor_scales) self._init_layers()
def __init__(self, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), use_sigmoid_cls=False): super(RPNHead, self).__init__() self.in_channels = in_channels self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.rpn_conv = nn.Conv2d(in_channels, feat_channels, 3, padding=1) self.relu = nn.ReLU(inplace=True) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) out_channels = (self.num_anchors if self.use_sigmoid_cls else self.num_anchors * 2) self.rpn_cls = nn.Conv2d(feat_channels, out_channels, 1) self.rpn_reg = nn.Conv2d(feat_channels, self.num_anchors * 4, 1) self.debug_imgs = None
def __init__( self, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[8, 16, 32, 64, 128], # [4, 8, 16, 32, 64] in Faster RCNN anchor_base_sizes=None, octave_base_scale=4, # scales_per_octave=3, # pos_iou_low=0.5, pos_iou_high=0.8): super(AnchorHead, self).__init__() self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.pos_iou_low = pos_iou_low self.pos_iou_high = pos_iou_high self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.octave_scales = np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) self.anchor_scales = self.octave_scales * octave_base_scale # 3 scales self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len( self.anchor_scales) # 3*3 self.cls_out_channels = 80
def __init__( self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), ): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.cls_out_channels = num_classes - 1 self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, pre_defined_anchors=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.pre_defined_anchors = pre_defined_anchors self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] import pdb pdb.set_trace() for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios, pre_defined_anchors=self.pre_defined_anchors)) print(self.anchor_generators[0].base_anchors[0:2]) #self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self.num_anchors = self.anchor_generators[0].base_anchors.shape[0] print(self.num_anchors) self._init_layers()
def __init__(self, in_channels, num_classes, stacked_convs=4, feat_channels=256, octave_base_scale=4, scales_per_octave=3, anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[8, 16, 32, 64, 128], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)): super(RetinaHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.octave_base_scale = octave_base_scale self.scales_per_octave = scales_per_octave self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: octave_scales = np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) anchor_scales = octave_scales * octave_base_scale self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.relu = nn.ReLU(inplace=True) self.num_anchors = int( len(self.anchor_ratios) * self.scales_per_octave) self.cls_out_channels = self.num_classes - 1 self.bbox_pred_dim = 4 self.stacked_convs = stacked_convs self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() for i in range(self.stacked_convs): chn = in_channels if i == 0 else feat_channels self.cls_convs.append( nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1)) self.reg_convs.append( nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1)) self.retina_cls = nn.Conv2d( feat_channels, self.num_anchors * self.cls_out_channels, 3, stride=1, padding=1) self.retina_reg = nn.Conv2d( feat_channels, self.num_anchors * self.bbox_pred_dim, 3, stride=1, padding=1) self.debug_imgs = None
def __init__(self, num_classes, in_channels, stacked_convs=4, feat_channels=256, octave_base_scale=4, scales_per_octave=3, anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(QuadRetinaHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.stacked_convs = stacked_convs self.feat_channels = feat_channels self.octave_base_scale = octave_base_scale self.scales_per_octave = scales_per_octave self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds octave_scales = np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) self.anchor_scales = octave_scales * octave_base_scale self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, self.anchor_scales, anchor_ratios)) # anchor cache self.base_anchors = dict() self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means_hbb=(.0, .0, .0, .0), target_stds_hbb=(1.0, 1.0, 1.0, 1.0), target_means_obb=(.0, .0, .0, .0), target_stds_obb=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_obb=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AO_RPNHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels # 在子类中使用,例如RPNHead self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means_hbb = target_means_hbb self.target_stds_hbb = target_stds_hbb self.target_means_obb = target_means_obb self.target_stds_obb = target_stds_obb self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_obb = build_loss(loss_obb) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) # anchor生成器,可生成给定scale/ratios的anchor,并将其变换到单位为anchor_base的格子上 self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def test_strides(): from mmdet.core import AnchorGenerator # Square strides self = AnchorGenerator([10], [1.], [1.], [10]) anchors = self.grid_anchors([(2, 2)], device='cpu') expected_anchors = torch.tensor([[-5., -5., 5., 5.], [5., -5., 15., 5.], [-5., 5., 5., 15.], [5., 5., 15., 15.]]) assert torch.equal(anchors[0], expected_anchors) # Different strides in x and y direction self = AnchorGenerator([(10, 20)], [1.], [1.], [10]) anchors = self.grid_anchors([(2, 2)], device='cpu') expected_anchors = torch.tensor([[-5., -5., 5., 5.], [5., -5., 15., 5.], [-5., 15., 5., 25.], [5., 15., 15., 25.]]) assert torch.equal(anchors[0], expected_anchors)
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 #原因是sigmoid比softmax少一个背景类 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] # anchor_base_sizes is the base sizes for different feature maps # 1 feature map has 1 base size for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) # anchor number per feature map pixel self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), with_cls=True, sampling=True, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(CascadeAnchorHeadRbbox, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.with_cls = with_cls self.sampling = sampling self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.cls_focal_loss = loss_cls['type'] in ['FocalLoss'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes self.use_iou_reg = loss_bbox['type'] in ['IoULoss'] self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) if self.cls_focal_loss: assert not sampling self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes
def __init__( self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], #TODO ? why scales is so large? anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], # 降采样步长 anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() # ipdb.set_trace(context=35) self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides # self.anchor_base_sizes被置为self.anchor_strides即降采样步长(如果不设置anchor_base_sizes的固定尺寸) self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) # 查看是否使用sigmoid,如果没写默认False self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] # 如果不是focal或者GHMC则采样标志为True,否则不可直接采样 #TODO:? what is the difference between sigmoid and others loss function ? # focalloss or ghmc use all the samples when computing loss, so no need to sample; if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__( self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(0.0, 0.0, 0.0, 0.0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), ): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = (list(anchor_strides) if anchor_base_sizes is None else anchor_base_sizes) self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) self.sampling = loss_cls["type"] not in ["FocalLoss", "GHMC"] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError("num_classes={} is too small".format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( # base anchor sizes: a proportion of feat map with respect to input anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 # why minus 1 else: self.cls_out_channels = num_classes # build losses self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) # generate anchors by using AnchorGenerator, anchor_base are anchor_stride e.g 4 8 16 32 64 self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, #分类的数目(RPN处前景背景,最终阶段分类81,类别+1,) in_channels, #输入的特征图通道数 feat_channels=256, #特征图的通道数 这个是rpn的中间那个卷积层的通道数 anchor_scales=[8, 16, 32], #anchor的scale 一个中心点的9个anchors anchor_ratios=[0.5, 1.0, 2.0], #ratio anchor_strides=[4, 8, 16, 32, 64], #anchor的步长 anchor_base_sizes=None, #anchor的基础大小 target_means=(.0, .0, .0, .0), #均值方差 target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict( type='CrossEntropyLoss', #sigmoid2分类 use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes #根据步长决定anchor大小 self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) #如果没有设置use_sigmoid,则返回false self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] #bool值判断cls的loss是否是focalloss和GHMC if self.use_sigmoid_cls: #sigmoid分类=True self.cls_out_channels = num_classes - 1 #-1去掉的就是背景分类 else: self.cls_out_channels = num_classes #这个是softmax分类 self.loss_cls = build_loss(loss_cls) #指定reg cls的类型build配置 self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False #不使用半精度浮点数计算 self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: #anchor_base_sizes根据anchor_strides生成,list类型 self.anchor_generators.append( #调用mmdet.core.anchor_generator生成anchors AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__( self, num_classes, # rpn为2,ssd为81,功能不一样 in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], #降采样步长 anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), use_sigmoid_cls=False, use_focal_loss=False): super(AnchorHead, self).__init__() # ipdb.set_trace() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls self.use_focal_loss = use_focal_loss self.anchor_generators = [] # self.anchor_base_sizes被置为self.anchor_strides即降采样步长 for anchor_base in self.anchor_base_sizes: # 添加的是针对每个stride生成的anchor类对象 self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 # sigmoid二分类 else: self.cls_out_channels = self.num_classes # 注意这个self是RPNHead(),所以执行的self._init_layers()不在下面,而是rpn的方法 self._init_layers()
def __init__( self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), # for 4 points. for GA only regresses 2 points. target_stds=(1., 1., 1., 1.), # for 4 points. use_sigmoid_cls=False, use_focal_loss=False): super(GAAnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls self.use_focal_loss = use_focal_loss self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( # for different scales and ratios generate different anchors AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes self.__init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), use_sigmoid_cls=False, use_focal_loss=False): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls self.use_focal_loss = use_focal_loss self.anchor_generators = [] # retina : self.anchor_base_sizes = [8, 16, 32, 64, 128] # anchor_scales=[4, 4*2^(1/3), 4*2^(2/3)] # anchor_ratios=[0.5, 1.0, 2.0] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes self._init_layers()
def __init__( self, in_channels, num_conv=2, num_classes=2, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, anchoring_means=(.0, .0, .0, .0), anchoring_stds=(1.0, 1.0, 1.0, 1.0), target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loc_filter_thr=0.01, loss_loc=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox_first=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): # yapf: disable super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_conv = num_conv self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.anchoring_means = anchoring_means self.anchoring_stds = anchoring_stds self.target_means = target_means self.target_stds = target_stds self.loc_filter_thr = loc_filter_thr self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] self.num_level_anchors = [131072, 32768, 8192, 2048, 512] self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) # one anchor per location self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.cls_focal_loss = loss_cls['type'] in ['FocalLoss'] self.loc_focal_loss = loss_loc['type'] in ['FocalLoss'] if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes # build losses self.loss_loc = build_loss(loss_loc) self.loss_cls = build_loss(loss_cls) self.loss_bbox_first = build_loss(loss_bbox_first) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), forest_clssifier=None): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds if forest_clssifier is not None: self.use_forest = forest_clssifier['use_forest'] self.parent_class_num = forest_clssifier['parent_class_num'] self.fine_grained_class_num = forest_clssifier[ 'fine_grained_class_num'] self.all_classes_num = self.parent_class_num + [ self.fine_grained_class_num ] self.forest_structure = forest_clssifier['forest_structure'] assert len(self.forest_structure) == len(self.parent_class_num) else: self.use_forest = False self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, octave_base_scale=8, scales_per_octave=3, octave_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, anchoring_means=(.0, .0, .0, .0), anchoring_stds=(1.0, 1.0, 1.0, 1.0), target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), deformable_groups=4, loc_filter_thr=0.01, loss_loc=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_shape=dict(type='IoULoss', style='bounded', beta=0.2, loss_weight=1.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.octave_base_scale = octave_base_scale self.scales_per_octave = scales_per_octave self.octave_scales = octave_base_scale * np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) self.approxs_per_octave = len(self.octave_scales) * len(octave_ratios) self.octave_ratios = octave_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.anchoring_means = anchoring_means self.anchoring_stds = anchoring_stds self.target_means = target_means self.target_stds = target_stds self.deformable_groups = deformable_groups self.loc_filter_thr = loc_filter_thr self.approx_generators = [] self.square_generators = [] for anchor_base in self.anchor_base_sizes: # Generators for approxs self.approx_generators.append( AnchorGenerator(anchor_base, self.octave_scales, self.octave_ratios)) # Generators for squares self.square_generators.append( AnchorGenerator(anchor_base, [self.octave_base_scale], [1.0])) # one anchor per location self.num_anchors = 1 self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.cls_focal_loss = loss_cls['type'] in ['FocalLoss'] self.loc_focal_loss = loss_loc['type'] in ['FocalLoss'] if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes # build losses self.loss_loc = build_loss(loss_loc) self.loss_shape = build_loss(loss_shape) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self._init_layers() # added by Shengkai Wu self.IoU_balanced_Cls = loss_cls['type'] in [ 'IOUbalancedCrossEntropyLoss', 'IOUbalancedSigmoidFocalLoss' ] self.IoU_balanced_Loc = loss_bbox['type'] in [ 'IoUbalancedSmoothL1Loss' ]
def __init__(self, input_size=300, num_classes=81, norm_eval=False, freeze_all=False, in_channels=(576, 1280, 512, 256, 256, 128), anchor_strides=(16, 32, 64, 128, 150, 300), basesize_ratio_range=(0.1, 0.9), anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)): super(AnchorHead, self).__init__() self.input_size = input_size self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes self.norm_eval = norm_eval self.freeze_all = freeze_all num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios] # num_anchors = [4, 6, 6, 6, 4, 4], (if 1 then 4, if 2 then 6) reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( SeperableConv2d( in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( SeperableConv2d( in_channels[i], num_anchors[i] * num_classes, kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) min_ratio, max_ratio = basesize_ratio_range min_ratio = int(min_ratio * 100) max_ratio = int(max_ratio * 100) step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2)) min_sizes = [] max_sizes = [] for r in range(int(min_ratio), int(max_ratio) + 1, step): min_sizes.append(int(input_size * r / 100)) max_sizes.append(int(input_size * (r + step) / 100)) if input_size == 300: if basesize_ratio_range[0] == 0.15: # SSD300 COCO min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) elif basesize_ratio_range[0] == 0.2: # SSD300 VOC min_sizes.insert(0, int(input_size * 10 / 100)) max_sizes.insert(0, int(input_size * 20 / 100)) elif input_size == 512: if basesize_ratio_range[0] == 0.1: # SSD512 COCO min_sizes.insert(0, int(input_size * 4 / 100)) max_sizes.insert(0, int(input_size * 10 / 100)) elif basesize_ratio_range[0] == 0.15: # SSD512 VOC min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) self.anchor_generators = [] self.anchor_strides = anchor_strides for k in range(len(anchor_strides)): base_size = min_sizes[k] stride = anchor_strides[k] ctr = ((stride - 1) / 2., (stride - 1) / 2.) scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])] # Typical value: [1., 1.414] ratios = [1.] for r in anchor_ratios[k]: ratios += [1 / r, r] anchor_generator = AnchorGenerator( base_size, scales, ratios, scale_major=False, ctr=ctr) indices = list(range(len(ratios))) indices.insert(1, len(indices)) anchor_generator.base_anchors = torch.index_select( anchor_generator.base_anchors, 0, torch.LongTensor(indices)) self.anchor_generators.append(anchor_generator) self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = False self.cls_focal_loss = False self.fp16_enabled = False if self.norm_eval: self.apply(set_bn_to_eval) if self.freeze_all: def _freeze_conv(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.requires_grad = False self.apply(_freeze_conv)
def __init__( self, input_size=300, # 512: 512 num_classes=81, in_channels=(512, 1024, 512, 256, 256, 256), # 512: (512, 1024, 512, 256, 256, 256, 256) anchor_strides=(8, 16, 32, 64, 100, 300), #512:(8, 16, 32, 64, 128, 256, 512) anchor_ratios=([2, 3], [2, 3], [2, 3], [2, 3], [2], [ 2 ]), #512:([2, 3],[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]) target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2)): super(AnchorHead, self).__init__() self.input_size = input_size self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios] reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * num_classes, kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) # coco if (input_size == 300) & (num_classes == 81): min_sizes = [21, 45, 99, 153, 207, 261] max_sizes = [45, 99, 153, 207, 261, 315] elif (input_size == 512) & (num_classes == 81): min_sizes = [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8] max_sizes = [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72] # voc if (input_size == 300) & (num_classes == 21): min_sizes = [30, 60, 111, 162, 213, 264] max_sizes = [60, 111, 162, 213, 264, 315] elif (input_size == 512) & (num_classes == 21): min_sizes = [35, 76, 153, 220, 307, 384, 460] max_sizes = [76, 153, 230, 307, 384, 460, 537] self.anchor_generators = [] self.anchor_strides = anchor_strides for k in range(len(anchor_strides)): base_size = min_sizes[k] stride = anchor_strides[k] ctr = ((stride - 1) / 2., (stride - 1) / 2.) scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])] ratios = [1.] for r in anchor_ratios[k]: ratios += [1 / r, r] # 4 or 6 ratio anchor_generator = AnchorGenerator(base_size, scales, ratios, scale_major=False, ctr=ctr) indices = list(range(len(ratios))) indices.insert(1, len(indices)) anchor_generator.base_anchors = torch.index_select( anchor_generator.base_anchors, 0, torch.LongTensor(indices)) self.anchor_generators.append(anchor_generator) self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = False self.cls_focal_loss = False self.fp16_enabled = False
def __init__( self, num_classes, in_channels, feat_channels=256, octave_base_scale=8, scales_per_octave=3, octave_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, anchoring_means=(0.0, 0.0, 0.0, 0.0), anchoring_stds=(1.0, 1.0, 1.0, 1.0), target_means=(0.0, 0.0, 0.0, 0.0), target_stds=(1.0, 1.0, 1.0, 1.0), deformable_groups=4, loc_filter_thr=0.01, loss_loc=dict( type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0 ), loss_shape=dict(type="BoundedIoULoss", beta=0.2, loss_weight=1.0), loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), ): # yapf: disable super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.octave_base_scale = octave_base_scale self.scales_per_octave = scales_per_octave self.octave_scales = octave_base_scale * np.array( [2**(i / scales_per_octave) for i in range(scales_per_octave)]) self.approxs_per_octave = len(self.octave_scales) * len(octave_ratios) self.octave_ratios = octave_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = (list(anchor_strides) if anchor_base_sizes is None else anchor_base_sizes) self.anchoring_means = anchoring_means self.anchoring_stds = anchoring_stds self.target_means = target_means self.target_stds = target_stds self.deformable_groups = deformable_groups self.loc_filter_thr = loc_filter_thr self.approx_generators = [] self.square_generators = [] for anchor_base in self.anchor_base_sizes: # Generators for approxs self.approx_generators.append( AnchorGenerator(anchor_base, self.octave_scales, self.octave_ratios)) # Generators for squares self.square_generators.append( AnchorGenerator(anchor_base, [self.octave_base_scale], [1.0])) # one anchor per location self.num_anchors = 1 self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) self.cls_focal_loss = loss_cls["type"] in ["FocalLoss"] self.loc_focal_loss = loss_loc["type"] in ["FocalLoss"] if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes # build losses self.loss_loc = build_loss(loss_loc) self.loss_shape = build_loss(loss_shape) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, in_channels, semantic_dims=300, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), vec_path=None, voc_path=None, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorSemanticHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.semantic_dims = semantic_dims self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.voc_path = voc_path self.vec_path = vec_path self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self._init_layers() voc = np.loadtxt(self.voc_path, dtype='float32', delimiter=',') vec_load = np.loadtxt(self.vec_path, dtype='float32', delimiter=',') vec = vec_load[:, :self.num_classes] self.voc = torch.tensor(voc, dtype=torch.float32).cuda() self.vec = torch.tensor(vec, dtype=torch.float32) self.vec_bg_weight = self.vec[:, 0] self.vec_bg_weight = torch.tensor(self.vec_bg_weight).cuda().view( [1, 300]) self.kernel_semantic = nn.Linear(self.voc.shape[1], self.vec.shape[0]) self.vec_bg = nn.Linear(300, 2, bias=False)
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(AnchorHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.fp16_enabled = False self.anchor_generators = [] #Changes here if isinstance(self.anchor_scales[0], list): self.muti_scales=True else: self.muti_scales = False if self.muti_scales: assert len(self.anchor_ratios) == len(self.anchor_scales) self.num_anchors_list = [len(self.anchor_scales[i]) for i in range(5)] self.anchor_generators = [] for i in range(len(self.anchor_base_sizes)): anchor_base = self.anchor_base_sizes[i] self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales[i], anchor_ratios[i])) self.num_anchors = max(self.num_anchors_list) else: for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) #Change over self._init_layers()
def __init__(self, input_size=300, num_classes=81, in_channels=(512, 1024, 512, 256, 256, 256), anchor_strides=(8, 16, 32, 64, 100, 300), basesize_ratio_range=(0.1, 0.9), anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)): super(AnchorHead, self).__init__() self.input_size = input_size self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios] reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * num_classes, kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) min_ratio, max_ratio = basesize_ratio_range min_ratio = int(min_ratio * 100) max_ratio = int(max_ratio * 100) step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2)) min_sizes = [] max_sizes = [] for r in range(int(min_ratio), int(max_ratio) + 1, step): min_sizes.append(int(input_size * r / 100)) max_sizes.append(int(input_size * (r + step) / 100)) if input_size == 300: if basesize_ratio_range[0] == 0.15: # SSD300 COCO min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) elif basesize_ratio_range[0] == 0.2: # SSD300 VOC min_sizes.insert(0, int(input_size * 10 / 100)) max_sizes.insert(0, int(input_size * 20 / 100)) elif input_size == 512: if basesize_ratio_range[0] == 0.1: # SSD512 COCO min_sizes.insert(0, int(input_size * 4 / 100)) max_sizes.insert(0, int(input_size * 10 / 100)) elif basesize_ratio_range[0] == 0.15: # SSD512 VOC min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) self.anchor_generators = [] self.anchor_strides = anchor_strides for k in range(len(anchor_strides)): base_size = min_sizes[k] stride = anchor_strides[k] ctr = ((stride - 1) / 2., (stride - 1) / 2.) scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])] ratios = [1.] for r in anchor_ratios[k]: ratios += [1 / r, r] # 4 or 6 ratio anchor_generator = AnchorGenerator(base_size, scales, ratios, scale_major=False, ctr=ctr) indices = list(range(len(ratios))) indices.insert(1, len(indices)) anchor_generator.base_anchors = torch.index_select( anchor_generator.base_anchors, 0, torch.LongTensor(indices)) self.anchor_generators.append(anchor_generator) self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = False self.cls_focal_loss = False self.fp16_enabled = False
def __init__( self, num_classes, in_channels, feat_channels=256, stacked_convs=2, align_conv_type='AlignConv', # [AlignConv,DCN,GA_DCN,Conv] align_conv_size=3, with_orconv=True, anchor_scales=[4], anchor_ratios=[1.0], anchor_strides=[8, 16, 32, 64, 128], anchor_base_sizes=None, target_means=(.0, .0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0, 1.0), loss_fam_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_fam_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_odm_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_odm_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(S2ANetHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.align_conv_type = align_conv_type self.align_conv_size = align_conv_size self.with_orconv = with_orconv self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = loss_odm_cls.get('use_sigmoid', False) self.sampling = loss_odm_cls['type'] not in ['FocalLoss', 'GHMC'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.loss_fam_cls = build_loss(loss_fam_cls) self.loss_fam_bbox = build_loss(loss_fam_bbox) self.loss_odm_cls = build_loss(loss_odm_cls) self.loss_odm_bbox = build_loss(loss_odm_bbox) self.fp16_enabled = False self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) # training mode self.training = True # anchor cache self.base_anchors = dict() self._init_layers()
def __init__(self, input_size=300, num_classes=81, in_channels=(512, 1024, 512, 256, 256, 256), anchor_strides=(8, 16, 32, 64, 100, 300), # basesize_ratio_range=(0.1, 0.9), basesize_ratio_range=(0.2, 0.9), anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)): super(AnchorHead, self).__init__() self.input_size = input_size self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes # num_anchors = [4, 6, 6, 6, 4, 4] num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios] reg_convs = [] cls_convs = [] for i in range(len(in_channels)): reg_convs.append( nn.Conv2d( in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d( in_channels[i], num_anchors[i] * num_classes, kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) min_ratio, max_ratio = basesize_ratio_range # min_ratio = 20 # max_ratio = 90 min_ratio = int(min_ratio * 100) max_ratio = int(max_ratio * 100) # step = 17 step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2)) min_sizes = [] max_sizes = [] # for r in range(20, 91, 17) # r = 10, 30, 50, 70, 90 for r in range(int(min_ratio), int(max_ratio) + 1, step): # min_sizes = [60, 111, 162, 213, 264] # max_sizes = [111, 162, 213, 264, 315] min_sizes.append(int(input_size * r / 100)) max_sizes.append(int(input_size * (r + step) / 100)) # min_sizes = [30, 60, 111, 162, 213, 264] # max_sizes = [60, 111, 162, 213, 264, 315] if input_size == 300: if basesize_ratio_range[0] == 0.15: # SSD300 COCO min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) elif basesize_ratio_range[0] == 0.2: # SSD300 VOC min_sizes.insert(0, int(input_size * 10 / 100)) max_sizes.insert(0, int(input_size * 20 / 100)) elif input_size == 512: if basesize_ratio_range[0] == 0.1: # SSD512 COCO min_sizes.insert(0, int(input_size * 4 / 100)) max_sizes.insert(0, int(input_size * 10 / 100)) elif basesize_ratio_range[0] == 0.15: # SSD512 VOC min_sizes.insert(0, int(input_size * 7 / 100)) max_sizes.insert(0, int(input_size * 15 / 100)) self.anchor_generators = [] self.anchor_strides = anchor_strides # for k in range(6): for k in range(len(anchor_strides)): base_size = min_sizes[k] # 30, 60, 111, 162, 213, 264 stride = anchor_strides[k] # 8, 16, 32, 64, 100, 300 # ctr : 中心点坐标(cx, cy) # ctr : (3.5, 3.5)、(7.5, 7.5)、 (15.5, 15.5)、(31.5, 31.5)、 (49.5, 49.5)、 (149.5, 149.5) ctr = ((stride - 1) / 2., (stride - 1) / 2.) # scales: [1.0, 1.4142135623730951] [1.0, 1.3601470508735443] [1.0, 1.2080808993852437] # [1.0, 1.1466537466972386] [1.0, 1.1132998786123665] [1.0, 1.0923286218816286] scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])] # ratios : [1.0, 0.5, 2] # [1.0, 0.5, 2, 0.3333333333333333, 3] # [1.0, 0.5, 2, 0.3333333333333333, 3] # [1.0, 0.5, 2, 0.3333333333333333, 3] # [1.0, 0.5, 2] # [1.0, 0.5, 2] ratios = [1.] # r = [2] 或者 [2, 3] for r in anchor_ratios[k]: # r=2 ratios=[1.0, 0.5, 2] # r=3, ratios=[1.0, 0.5, 2, 0.3333333333333333, 3] ratios += [1 / r, r] # 4 or 6 ratio # 根据6个anchor_strides、base_size、scales、ratios、ctr产生每个anchor_strides对应产生的 # 不同种类anchor的坐标:torch.Size([6, 4]、[10, 4]、[10, 4]、[10, 4]、[6, 4]、[6, 4]) anchor_generator = AnchorGenerator( base_size, scales, ratios, scale_major=False, ctr=ctr) indices = list(range(len(ratios))) # indices : [0, 3, 1, 2] # [0, 5, 1, 2, 3, 4] # [0, 5, 1, 2, 3, 4] # [0, 5, 1, 2, 3, 4] # [0, 3, 1, 2] # [0, 3, 1, 2] indices.insert(1, len(indices)) # 将anchor_generator.base_anchors产生的base_anchors按照indices选出来 # 此时anchor_generator.base_anchors :torch.Size([4, 4]、[6, 4]、[6, 4]、[6, 4]、[4, 4]、[4, 4]) anchor_generator.base_anchors = torch.index_select( anchor_generator.base_anchors, 0, torch.LongTensor(indices)) # self.anchor_generators 表示每个 anchor_strides 下对应的 AnchorGenerator self.anchor_generators.append(anchor_generator) self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = False self.use_focal_loss = False