예제 #1
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 clip_border=True,
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0),
                 init_cfg=None):
        super(BBoxHead, self).__init__(init_cfg)
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
            self.fc_reg = nn.Linear(in_channels, out_dim_reg)
        self.debug_imgs = None
        if init_cfg is None:
            self.init_cfg = []
            if self.with_cls:
                self.init_cfg += [
                    dict(type='Normal', std=0.01, override=dict(name='fc_cls'))
                ]
            if self.with_reg:
                self.init_cfg += [
                    dict(type='Normal',
                         std=0.001,
                         override=dict(name='fc_reg'))
                ]
예제 #2
0
 def __init__(
         self,
         vertex_head,
         polyrnn_head,
         loss_vertex=dict(type='GaussianFocalLoss',
                          alpha=2.0,
                          gamma=4.0,
                          loss_weight=1),
         loss_polygon=dict(type='CrossEntropyLoss',
                           use_mask=False,
                           loss_weight=1.0),
         loss_offset=dict(type='L1Loss', loss_weight=1.0),
         loss_type=0,
         params=dict(dt_threshold=2, radius=1),
 ):
     super(PolygonHead, self).__init__()
     self.vertex_head = build_head(vertex_head)
     polyrnn_head['loss_type'] = loss_type
     self.polyrnn_head = build_head(polyrnn_head)
     self.loss_vertex_cfg = loss_vertex
     self.loss_polygon_cfg = loss_polygon
     self.loss_vertex = build_loss(loss_vertex)
     self.loss_polygon = build_loss(loss_polygon)
     self.loss_offset = build_loss(loss_offset)
     self.loss_type = loss_type
     self.params = params
예제 #3
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 start_bbox_type='hbb',
                 end_bbox_type='hbb',
                 reg_dim=None,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=15,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(OBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.start_bbox_type = start_bbox_type
        self.end_bbox_type = end_bbox_type
        assert self.start_bbox_type in ['hbb', 'obb', 'poly']
        assert self.end_bbox_type in ['hbb', 'obb', 'poly']
        self.reg_dim = get_bbox_dim(self.end_bbox_type) \
                if reg_dim is None else reg_dim

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = self.reg_dim if reg_class_agnostic else \
                    self.reg_dim * num_classes
            self.fc_reg = nn.Linear(in_channels, out_dim_reg)
        self.debug_imgs = None
예제 #4
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 score_type='normal',
                 head_config=[True,False,False],
                 init_type='normal',
                 bbox_coder=dict(
                     type='DeltaXYWHBBoxCoder',
                     clip_border=True,
                     target_means=[0., 0., 0., 0.],
                     target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=False,
                     loss_weight=1.0),
                 loss_bbox=dict(
                     type='SmoothL1Loss', beta=1.0, loss_weight=1.0)):
        super(AttentionLogoBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = False
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        # 回归score的方式
        self.score_type = score_type
        self.head_config = head_config
        self.init_type = init_type
        
        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
        self.debug_imgs = None
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 clip_border=True,
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(LogoDCBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = False
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
        self.debug_imgs = None

        self.file_to_style = {}
        self.data_path = '/data/zhaozhiyuan/tb_variation/VOCdevkit_all'
        self.anno_path = os.path.join(self.data_path, 'VOC2007', 'Annotations')
        """init style and class index"""
        for anno in os.listdir(self.anno_path):
            anno_file = ET.parse(os.path.join(self.anno_path, anno))
            name = anno_file.find('object').find('name').text
            style = anno_file.find('object').find('style').text
            self.file_to_style[anno.split('.')[0]] = style
예제 #6
0
    def __init__(self,
                 num_query=1,
                 transformer=None,
                 positional_encoding=dict(
                     type='SinePositionalEncoding',
                     num_feats=128,
                     normalize=True),
                 bbox_head=None,
                 cls_head=None,
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=False,
                     loss_weight=1.0,
                 ),
                 loss_bbox=dict(type='L1Loss', loss_weight=5.0),
                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=None,
                 frozen_modules=None,
                 **kwargs):
        super(StarkHead, self).__init__(init_cfg=init_cfg)
        self.transformer = build_transformer(transformer)
        self.positional_encoding = build_positional_encoding(
            positional_encoding)
        assert bbox_head is not None
        self.bbox_head = build_head(bbox_head)
        if cls_head is None:
            # the stage-1 training
            self.loss_bbox = build_loss(loss_bbox)
            self.loss_iou = build_loss(loss_iou)
            self.cls_head = None
        else:
            # the stage-2 training
            self.cls_head = build_head(cls_head)
            self.loss_cls = build_loss(loss_cls)
        self.embed_dims = self.transformer.embed_dims
        self.num_query = num_query
        self.query_embedding = nn.Embedding(self.num_query, self.embed_dims)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.fp16_enabled = False

        if frozen_modules is not None:
            assert isinstance(frozen_modules, list)
            for module in frozen_modules:
                m = getattr(self, module)
                # TODO: Study the influence of freezing BN running_mean and
                # running_variance of `frozen_modules` in the 2nd stage train.
                # The official code doesn't freeze these.
                for param in m.parameters():
                    param.requires_grad = False
예제 #7
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 feat_channels=256,
                 anchor_scales=[8, 16, 32],
                 anchor_ratios=[0.5, 1.0, 2.0],
                 anchor_strides=[4, 8, 16, 32, 64],
                 anchor_base_sizes=None,
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0),
                 with_cls=True,
                 sampling=True,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=True,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss',
                                beta=1.0 / 9.0,
                                loss_weight=1.0)):
        super(CascadeAnchorHeadRbbox, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.feat_channels = feat_channels
        self.anchor_scales = anchor_scales
        self.anchor_ratios = anchor_ratios
        self.anchor_strides = anchor_strides
        self.anchor_base_sizes = list(
            anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
        self.target_means = target_means
        self.target_stds = target_stds
        self.with_cls = with_cls
        self.sampling = sampling
        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        self.cls_focal_loss = loss_cls['type'] in ['FocalLoss']
        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes - 1
        else:
            self.cls_out_channels = num_classes
        self.use_iou_reg = loss_bbox['type'] in ['IoULoss']
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        if self.cls_focal_loss:
            assert not sampling

        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))

        self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
        if self.use_sigmoid_cls:
            self.cls_out_channels = self.num_classes - 1
        else:
            self.cls_out_channels = self.num_classes
예제 #8
0
    def __init__(
        self,
        num_classes,
        in_channels,
        feat_channels=256,
        stacked_convs=4,
        strides=(4, 8, 16, 32, 64),
        scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
        pos_scale=0.2,
        num_grids=[40, 36, 24, 16, 12],
        cls_down_index=0,
        loss_mask=None,
        loss_cls=None,
        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
        train_cfg=None,
        test_cfg=None,
        init_cfg=[
            dict(type='Normal', layer='Conv2d', std=0.01),
            dict(
                type='Normal',
                std=0.01,
                bias_prob=0.01,
                override=dict(name='conv_mask_list')),
            dict(
                type='Normal',
                std=0.01,
                bias_prob=0.01,
                override=dict(name='conv_cls'))
        ],
    ):
        super(SOLOHead, self).__init__(init_cfg)
        self.num_classes = num_classes
        self.cls_out_channels = self.num_classes
        self.in_channels = in_channels
        self.feat_channels = feat_channels
        self.stacked_convs = stacked_convs
        self.strides = strides
        self.num_grids = num_grids
        # number of FPN feats
        self.num_levels = len(strides)
        assert self.num_levels == len(scale_ranges) == len(num_grids)
        self.scale_ranges = scale_ranges
        self.pos_scale = pos_scale

        self.cls_down_index = cls_down_index
        self.loss_cls = build_loss(loss_cls)
        self.loss_mask = build_loss(loss_mask)
        self.norm_cfg = norm_cfg
        self.init_cfg = init_cfg
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self._init_layers()
예제 #9
0
    def __init__(self,
                 roi_feat_size=7,
                 in_channels=256,
                 num_convs=4,
                 num_fcs=2,
                 reg_num=2,
                 conv_out_channels=256,
                 fc_out_channels=1024,
                 offset_coordinate='rectangle',
                 offset_coder=dict(
                    type='DeltaXYOffsetCoder',
                    target_means=[0.0, 0.0],
                    target_stds=[0.5, 0.5]),
                 reg_decoded_offset=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 loss_offset=dict(type='MSELoss', loss_weight=1.0)):
        super(OffsetHead, self).__init__()
        self.in_channels = in_channels
        self.conv_out_channels = conv_out_channels
        self.fc_out_channels = fc_out_channels
        self.offset_coordinate = offset_coordinate
        self.reg_decoded_offset = reg_decoded_offset
        self.reg_num = reg_num
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        self.offset_coder = build_bbox_coder(offset_coder)
        self.loss_offset = build_loss(loss_offset)

        self.convs = nn.ModuleList()
        for i in range(num_convs):
            in_channels = (self.in_channels if i == 0 else self.conv_out_channels)
            self.convs.append(
                Conv2d(
                    in_channels,
                    self.conv_out_channels,
                    3,
                    padding=1))
    
        roi_feat_size = _pair(roi_feat_size)
        roi_feat_area = roi_feat_size[0] * roi_feat_size[1]
        self.fcs = nn.ModuleList()
        for i in range(num_fcs):
            in_channels = (
                self.conv_out_channels *
                roi_feat_area if i == 0 else self.fc_out_channels)
            self.fcs.append(nn.Linear(in_channels, self.fc_out_channels))

        self.fc_offset = nn.Linear(self.fc_out_channels, self.reg_num)
        self.relu = nn.ReLU()
        self.loss_offset = build_loss(loss_offset)
예제 #10
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(BBoxHeadBN, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
            # self.fc_reg = nn.Linear(in_channels, out_dim_reg)
            self.fc_reg = nn.Sequential(
                nn.Linear(self.in_channels, out_dim_reg), nn.BatchNorm1d(4))
        self.debug_imgs = None
    def __init__(self,
                 num_classes,
                 in_channels,
                 feat_channels=256,
                 bbox_type='hbb',
                 reg_dim=None,
                 stacked_convs=4,
                 strides=(4, 8, 16, 32, 64),
                 dcn_on_last_conv=False,
                 conv_bias='auto',
                 background_label=None,
                 loss_cls=dict(type='FocalLoss',
                               use_sigmoid=True,
                               gamma=2.0,
                               alpha=0.25,
                               loss_weight=1.0),
                 loss_bbox=dict(type='IoULoss', loss_weight=1.0),
                 conv_cfg=None,
                 norm_cfg=None,
                 train_cfg=None,
                 test_cfg=None):
        super(OBBAnchorFreeHead, self).__init__()
        self.num_classes = num_classes
        self.cls_out_channels = num_classes
        self.in_channels = in_channels
        self.feat_channels = feat_channels
        self.bbox_type = bbox_type
        self.reg_dim = get_bbox_dim(self.bbox_type) \
                if reg_dim is None else reg_dim
        self.stacked_convs = stacked_convs
        self.strides = strides
        self.dcn_on_last_conv = dcn_on_last_conv
        assert conv_bias == 'auto' or isinstance(conv_bias, bool)
        self.conv_bias = conv_bias
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.fp16_enabled = False
        self.background_label = (num_classes if background_label is None else
                                 background_label)
        # background_label should be either 0 or num_classes
        assert (self.background_label == 0
                or self.background_label == num_classes)

        self._init_layers()
예제 #12
0
    def __init__(self,
                 num_fcs=2,
                 fc_out_channels=1024,
                 gggs_config=None,
                 *args,
                 **kwargs):
        super(GGGSBBoxHeadWith0,
              self).__init__(num_fcs=num_fcs,
                             fc_out_channels=fc_out_channels,
                             *args,
                             **kwargs)

        self.fc_cls = CLASS_HEAD(gggs_config=gggs_config,
                                 cls_last_dim=self.cls_last_dim,
                                 fc_out_channels=fc_out_channels)
        self.loss_bins = []
        for i in range(gggs_config.num_bins):
            self.loss_bins.append(build_loss(gggs_config.loss_bin))

        self.label2binlabel = torch.load(gggs_config.label2binlabel).cuda()
        self.pred_slice = torch.load(gggs_config.pred_slice).cuda()

        # TODO: update this ugly implementation. Save fg_split to a list and
        #  load groups by gs_config.num_bins
        with open(gggs_config.fg_split, 'rb') as fin:
            fg_split = pickle.load(fin)

        self.fg_splits = []
        self.fg_splits.append(torch.from_numpy(fg_split['(10000,~)']).cuda())
        self.fg_splits.append(
            torch.from_numpy(fg_split['(2000,10000)']).cuda())
        self.fg_splits.append(torch.from_numpy(fg_split['(500,2000)']).cuda())
        self.fg_splits.append(torch.from_numpy(fg_split['(0,500)']).cuda())

        self.others_sample_ratio = gggs_config.others_sample_ratio
예제 #13
0
    def __init__(self, models):
        super(EnsembleModel, self).__init__()
        self.models = models
        # TODO: Fix half()
        self.fusion = Fusion().half()

        self.num_classes = 1
        loss_cls = dict(
            type='CrossEntropyLoss',
            use_sigmoid=True,
            loss_weight=1.0)

        assigner = dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            match_low_quality=False,
            ignore_iof_thr=-1)

        self.assigner = build_assigner(assigner)
        self.loss_cls = build_loss(loss_cls)

        sampler_cfg = dict(type='PseudoSampler')
        self.sampler = build_sampler(sampler_cfg)
예제 #14
0
    def __init__(self,
                 num_chars=92,
                 visual_dim=64,
                 fusion_dim=1024,
                 node_input=32,
                 node_embed=256,
                 edge_input=5,
                 edge_embed=256,
                 num_gnn=2,
                 num_classes=26,
                 loss=dict(type='SDMGRLoss'),
                 bidirectional=False,
                 train_cfg=None,
                 test_cfg=None):
        super().__init__()

        self.fusion = Block([visual_dim, node_embed], node_embed, fusion_dim)
        self.node_embed = nn.Embedding(num_chars, node_input, 0)
        hidden = node_embed // 2 if bidirectional else node_embed
        self.rnn = nn.LSTM(input_size=node_input,
                           hidden_size=hidden,
                           num_layers=1,
                           batch_first=True,
                           bidirectional=bidirectional)
        self.edge_embed = nn.Linear(edge_input, edge_embed)
        self.gnn_layers = nn.ModuleList(
            [GNNLayer(node_embed, edge_embed) for _ in range(num_gnn)])
        self.node_cls = nn.Linear(node_embed, num_classes)
        self.edge_cls = nn.Linear(edge_embed, 2)
        self.loss = build_loss(loss)
예제 #15
0
    def __init__(self,
                 tau=0.2,
                 min_iof=0.7,
                 loss_sim=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 init_cfg=None,
                 *args,
                 **kwargs):
        super(CORE2FCBBoxHead, self).__init__(*args,
                                              init_cfg=init_cfg,
                                              **kwargs)
        self.tau = tau
        self.min_iof = min_iof
        self.loss_sim = build_loss(loss_sim)

        # add relation embedding
        self.rel_fcs = nn.ModuleList()
        for i in range(self.num_shared_fcs):
            rel_fc = nn.Sequential(
                nn.Linear(self.shared_out_channels, self.shared_out_channels),
                nn.ReLU(inplace=True), nn.Linear(self.shared_out_channels,
                                                 128))
            self.rel_fcs.append(rel_fc)

        if init_cfg is None:
            self.init_cfg += [
                dict(type='Xavier',
                     layer='Linear',
                     override=[
                         dict(name='rel_fcs'),
                     ])
            ]
    def __init__(
        self,
        num_classes,
        in_channels,
        stacked_convs=4,
        conv_cfg=None,
        norm_cfg=dict(type="GN", num_groups=32, requires_grad=True),
        loss_dfl=dict(type="DistributionFocalLoss", loss_weight=0.25),
        reg_max=16,
        **kwargs,
    ):
        self.stacked_convs = stacked_convs
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.reg_max = reg_max
        super(ModifiedGFLHead, self).__init__(num_classes, in_channels,
                                              **kwargs)

        self.sampling = False
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # SSD sampling=False so use PseudoSampler
            sampler_cfg = dict(type="PseudoSampler")
            self.sampler = build_sampler(sampler_cfg, context=self)

        self.integral = Integral(self.reg_max)
        self.loss_dfl = build_loss(loss_dfl)
예제 #17
0
    def __init__(
            self,
            in_channels,
            out_channels,
            text_repr_type='poly',  # 'poly' or 'quad'
            downsample_ratio=0.25,
            loss=dict(type='PANLoss'),
            train_cfg=None,
            test_cfg=None):
        super().__init__()

        assert check_argument.is_type_list(in_channels, int)
        assert isinstance(out_channels, int)
        assert text_repr_type in ['poly', 'quad']
        assert 0 <= downsample_ratio <= 1

        self.loss_module = build_loss(loss)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.text_repr_type = text_repr_type
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.downsample_ratio = downsample_ratio
        if loss['type'] == 'PANLoss':
            self.decoding_type = 'pan'
        elif loss['type'] == 'PSELoss':
            self.decoding_type = 'pse'
        else:
            type = loss['type']
            raise NotImplementedError(f'unsupported loss type {type}.')

        self.out_conv = nn.Conv2d(in_channels=np.sum(np.array(in_channels)),
                                  out_channels=out_channels,
                                  kernel_size=1)
        self.init_weights()
예제 #18
0
    def __init__(self,
                 in_channels,
                 decoding_type='textsnake',
                 text_repr_type='poly',
                 loss=dict(type='TextSnakeLoss'),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(type='Normal',
                               override=dict(name='out_conv'),
                               mean=0,
                               std=0.01)):
        super().__init__(init_cfg=init_cfg)

        assert isinstance(in_channels, int)
        self.in_channels = in_channels
        self.out_channels = 5
        self.downsample_ratio = 1.0
        self.decoding_type = decoding_type
        self.text_repr_type = text_repr_type
        self.loss_module = build_loss(loss)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

        self.out_conv = nn.Conv2d(in_channels=self.in_channels,
                                  out_channels=self.out_channels,
                                  kernel_size=1,
                                  stride=1,
                                  padding=0)
예제 #19
0
    def __init__(
        self,
        with_avg_pool=False,
        num_shared_fcs=2,
        roi_feat_size=7,
        in_channels=256,
        fc_out_channels=1024,
        num_classes=15,
        reg_class_agnostic=False,
        ratio_thr=0.8,
        bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                        target_means=[0., 0., 0., 0.],
                        target_stds=[0.1, 0.1, 0.2, 0.2]),
        fix_coder=dict(type='GVFixCoder'),
        ratio_coder=dict(type='GVRatioCoder'),
        loss_cls=dict(type='CrossEntropyLoss',
                      use_sigmoid=False,
                      loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0),
        loss_fix=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0),
        loss_ratio=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=16.0),
    ):
        super(GVBBoxHead, self).__init__()
        self.with_avg_pool = with_avg_pool
        self.num_shared_fcs = num_shared_fcs
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.fc_out_channels = fc_out_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.ratio_thr = ratio_thr
        self.fp16_enabled = False
        self.start_bbox_type = 'hbb'
        self.end_bbox_type = 'poly'

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.fix_coder = build_bbox_coder(fix_coder)
        self.ratio_coder = build_bbox_coder(ratio_coder)

        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_fix = build_loss(loss_fix)
        self.loss_ratio = build_loss(loss_ratio)

        self._init_layers()
예제 #20
0
    def __init__(self,
                 num_fcs,
                 in_channels,
                 fc_channels,
                 out_channels,
                 norm_cfg=None,
                 act_cfg=None,
                 num_classes=None,
                 loss=None,
                 loss_pairwise=None,
                 topk=(1, ),
                 init_cfg=dict(
                     type='Normal', layer='Linear', mean=0, std=0.01, bias=0)):
        super(LinearReIDHead, self).__init__(init_cfg)
        assert isinstance(topk, (int, tuple))
        if isinstance(topk, int):
            topk = (topk, )
        for _topk in topk:
            assert _topk > 0, 'Top-k should be larger than 0'
        self.topk = topk

        if not loss:
            if isinstance(num_classes, int):
                warnings.warn('Since cross entropy is not set, '
                              'the num_classes will be ignored.')
            if not loss_pairwise:
                raise ValueError('Please choose at least one loss in '
                                 'triplet loss and cross entropy loss.')
        elif not isinstance(num_classes, int):
            raise TypeError('The num_classes must be a current number, '
                            'if there is cross entropy loss.')
        self.loss_cls = build_loss(loss) if loss else None
        self.loss_triplet = build_loss(
            loss_pairwise) if loss_pairwise else None

        self.num_fcs = num_fcs
        self.in_channels = in_channels
        self.fc_channels = fc_channels
        self.out_channels = out_channels
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.num_classes = num_classes
        self.accuracy = Accuracy(topk=self.topk)
        self.fp16_enabled = False

        self._init_layers()
예제 #21
0
파일: fce_head.py 프로젝트: xyzhu8/mmocr
    def __init__(self,
                 in_channels,
                 scales,
                 fourier_degree=5,
                 num_sample=50,
                 num_reconstr_points=50,
                 decoding_type='fcenet',
                 loss=dict(type='FCELoss'),
                 score_thr=0.3,
                 nms_thr=0.1,
                 alpha=1.0,
                 beta=1.0,
                 text_repr_type='poly',
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(type='Normal',
                               mean=0,
                               std=0.01,
                               override=[
                                   dict(name='out_conv_cls'),
                                   dict(name='out_conv_reg')
                               ])):

        super().__init__(init_cfg=init_cfg)
        assert isinstance(in_channels, int)

        self.downsample_ratio = 1.0
        self.in_channels = in_channels
        self.scales = scales
        self.fourier_degree = fourier_degree
        self.sample_num = num_sample
        self.num_reconstr_points = num_reconstr_points
        loss['fourier_degree'] = fourier_degree
        loss['num_sample'] = num_sample
        self.decoding_type = decoding_type
        self.loss_module = build_loss(loss)
        self.score_thr = score_thr
        self.nms_thr = nms_thr
        self.alpha = alpha
        self.beta = beta
        self.text_repr_type = text_repr_type
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.out_channels_cls = 4
        self.out_channels_reg = (2 * self.fourier_degree + 1) * 2

        self.out_conv_cls = nn.Conv2d(self.in_channels,
                                      self.out_channels_cls,
                                      kernel_size=3,
                                      stride=1,
                                      padding=1)
        self.out_conv_reg = nn.Conv2d(self.in_channels,
                                      self.out_channels_reg,
                                      kernel_size=3,
                                      stride=1,
                                      padding=1)
예제 #22
0
 def __init__(self,
              num_classes,
              in_channels,
              regress_ranges=((-1, 48), (48, 96), (96, 192), (192, 384),
                              (384, INF)),
              center_sampling=True,
              center_sample_radius=1.5,
              norm_on_bbox=True,
              centerness_on_reg=True,
              centerness_alpha=2.5,
              loss_cls=dict(type='FocalLoss',
                            use_sigmoid=True,
                            gamma=2.0,
                            alpha=0.25,
                            loss_weight=1.0),
              loss_bbox=dict(type='SmoothL1Loss',
                             beta=1.0 / 9.0,
                             loss_weight=1.0),
              loss_dir=dict(type='CrossEntropyLoss',
                            use_sigmoid=False,
                            loss_weight=1.0),
              loss_attr=dict(type='CrossEntropyLoss',
                             use_sigmoid=False,
                             loss_weight=1.0),
              loss_centerness=dict(type='CrossEntropyLoss',
                                   use_sigmoid=True,
                                   loss_weight=1.0),
              norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
              centerness_branch=(64, ),
              init_cfg=None,
              **kwargs):
     self.regress_ranges = regress_ranges
     self.center_sampling = center_sampling
     self.center_sample_radius = center_sample_radius
     self.norm_on_bbox = norm_on_bbox
     self.centerness_on_reg = centerness_on_reg
     self.centerness_alpha = centerness_alpha
     self.centerness_branch = centerness_branch
     super().__init__(num_classes,
                      in_channels,
                      loss_cls=loss_cls,
                      loss_bbox=loss_bbox,
                      loss_dir=loss_dir,
                      loss_attr=loss_attr,
                      norm_cfg=norm_cfg,
                      init_cfg=init_cfg,
                      **kwargs)
     self.loss_centerness = build_loss(loss_centerness)
     if init_cfg is None:
         self.init_cfg = dict(type='Normal',
                              layer='Conv2d',
                              std=0.01,
                              override=dict(type='Normal',
                                            name='conv_cls',
                                            std=0.01,
                                            bias_prob=0.01))
예제 #23
0
    def __init__(self,
                 preprocessor=None,
                 backbone=None,
                 encoder=None,
                 decoder=None,
                 loss=None,
                 label_convertor=None,
                 train_cfg=None,
                 test_cfg=None,
                 max_seq_len=40,
                 pretrained=None,
                 init_cfg=None):

        super().__init__(init_cfg=init_cfg)

        # Label convertor (str2tensor, tensor2str)
        assert label_convertor is not None
        label_convertor.update(max_seq_len=max_seq_len)
        self.label_convertor = build_convertor(label_convertor)

        # Preprocessor module, e.g., TPS
        self.preprocessor = None
        if preprocessor is not None:
            self.preprocessor = build_preprocessor(preprocessor)

        # Backbone
        assert backbone is not None
        self.backbone = build_backbone(backbone)

        # Encoder module
        self.encoder = None
        if encoder is not None:
            self.encoder = build_encoder(encoder)

        # Decoder module
        assert decoder is not None
        decoder.update(num_classes=self.label_convertor.num_classes())
        decoder.update(start_idx=self.label_convertor.start_idx)
        decoder.update(padding_idx=self.label_convertor.padding_idx)
        decoder.update(max_seq_len=max_seq_len)
        self.decoder = build_decoder(decoder)

        # Loss
        assert loss is not None
        loss.update(ignore_index=self.label_convertor.padding_idx)
        self.loss = build_loss(loss)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.max_seq_len = max_seq_len

        if pretrained is not None:
            warnings.warn('DeprecationWarning: pretrained is a deprecated \
                key, please consider using init_cfg')
            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
예제 #24
0
    def __init__(self, gs_config=None, *args, **kwargs):
        super(GSBBoxHeadWithV2, self).__init__(*args,**kwargs)

        self.fc_cls = nn.Linear(self.cls_last_dim,
                                self.num_classes + 5)
        # self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
        #                         torch.tensor([0, 1, 5, 2, 3, 5, 5, 5, 5, 4, 5, 5]),
        #                         torch.tensor([6, 6, 0, 6, 6, 1, 2, 3, 4, 6, 5, 6])]
        # self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
        #                         torch.tensor([4, 4, 4, 4, 4, 4, 0, 1, 2, 4, 3, 4]),
        #                         torch.tensor([5, 0, 1, 5, 2, 3, 5, 5, 5, 4, 5, 5]),
        #                         torch.tensor([0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2])]
        self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
                                torch.tensor([0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
                                torch.tensor([4, 4, 4, 0, 1, 2, 4, 4, 3, 4, 4, 4]),
                                torch.tensor([2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 2, 2]),
                                torch.tensor([2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 2])]
        # self.pred_slice = [
        #     [0, 2],
        #     [2, 6],
        #     [8, 7],
        # ]
        # self.pred_slice = [
        #     [0, 2],
        #     [2, 5],
        #     [7, 6],
        #     [13, 3],
        # ]
        self.pred_slice = [
            [0, 2],
            [2, 3],
            [5, 5],
            [10, 3],
            [13, 3]
        ]
        # self.fg_splits = [
        #     torch.tensor([0, 1, 3, 4, 9]),
        #     torch.tensor([2, 5, 6, 7, 8, 10])
        # ]
        # self.fg_splits = [
        #     torch.tensor([6, 7, 8, 10]),
        #     torch.tensor([1, 2, 4, 5, 9]),
        #     torch.tensor([0, 3])
        # ]
        self.fg_splits = [
            torch.tensor([0, 1]),
            torch.tensor([3, 4, 5, 8]),
            torch.tensor([2, 9]),
            torch.tensor([6, 7])
        ]
        self.others_sample_ratio = 3
        self.loss_bins = []
        for i in range(5):
            self.loss_bins.append(build_loss(gs_config.loss_bin))
예제 #25
0
    def __init__(self,
                 num_convs=4,
                 num_fcs=2,
                 roi_feat_size=14,
                 in_channels=256,
                 conv_out_channels=256,
                 fc_out_channels=1024,
                 num_classes=80,
                 loss_iou=dict(type='MSELoss', loss_weight=0.5),
                 init_cfg=[
                     dict(type='Kaiming', override=dict(name='convs')),
                     dict(type='Caffe2Xavier', override=dict(name='fcs')),
                     dict(
                         type='Normal',
                         std=0.01,
                         override=dict(name='fc_mask_iou'))
                 ]):
        super(MaskIoUHead, self).__init__(init_cfg)
        self.in_channels = in_channels
        self.conv_out_channels = conv_out_channels
        self.fc_out_channels = fc_out_channels
        self.num_classes = num_classes
        self.fp16_enabled = False

        self.convs = nn.ModuleList()
        for i in range(num_convs):
            if i == 0:
                # concatenation of mask feature and mask prediction
                in_channels = self.in_channels + 1
            else:
                in_channels = self.conv_out_channels
            stride = 2 if i == num_convs - 1 else 1
            self.convs.append(
                Conv2d(
                    in_channels,
                    self.conv_out_channels,
                    3,
                    stride=stride,
                    padding=1))

        roi_feat_size = _pair(roi_feat_size)
        pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2)
        self.fcs = nn.ModuleList()
        for i in range(num_fcs):
            in_channels = (
                self.conv_out_channels *
                pooled_area if i == 0 else self.fc_out_channels)
            self.fcs.append(Linear(in_channels, self.fc_out_channels))

        self.fc_mask_iou = Linear(self.fc_out_channels, self.num_classes)
        self.relu = nn.ReLU()
        self.max_pool = MaxPool2d(2, 2)
        self.loss_iou = build_loss(loss_iou)
예제 #26
0
    def __init__(self,
                 preprocessor=None,
                 backbone=None,
                 encoder=None,
                 decoder=None,
                 loss=None,
                 label_convertor=None,
                 train_cfg=None,
                 test_cfg=None,
                 max_seq_len=40,
                 pretrained=None):
        super().__init__()

        # Label convertor (str2tensor, tensor2str)
        assert label_convertor is not None
        label_convertor.update(max_seq_len=max_seq_len)
        self.label_convertor = build_convertor(label_convertor)

        # Preprocessor module, e.g., TPS
        self.preprocessor = None
        if preprocessor is not None:
            self.preprocessor = build_preprocessor(preprocessor)

        # Backbone
        assert backbone is not None
        self.backbone = build_backbone(backbone)

        # Encoder module
        self.encoder = None
        if encoder is not None:
            self.encoder = build_encoder(encoder)

        # Decoder module
        assert decoder is not None
        decoder.update(num_classes=self.label_convertor.num_classes())
        decoder.update(start_idx=self.label_convertor.start_idx)
        decoder.update(padding_idx=self.label_convertor.padding_idx)
        decoder.update(max_seq_len=max_seq_len)
        self.decoder = build_decoder(decoder)

        # Loss
        assert loss is not None
        loss.update(ignore_index=self.label_convertor.padding_idx)
        self.loss = build_loss(loss)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.max_seq_len = max_seq_len
        self.init_weights(pretrained=pretrained)
예제 #27
0
 def __init__(self,
              num_classes,
              in_channels,
              seg_feat_channels=256,
              stacked_convs=4,
              strides=(4, 8, 16, 32, 64),
              base_edge_list=(16, 32, 64, 128, 256),
              scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128,
                                                                      512)),
              sigma=0.2,
              num_grids=None,
              ins_out_channels=64,
              background_label=None,
              loss_mask=None,
              loss_cls=None,
              conv_cfg=None,
              norm_cfg=None,
              train_cfg=None,
              test_cfg=None,
              use_dcn_in_tower=False,
              type_dcn=None):
     super(SOLOv2Head, self).__init__()
     self.num_classes = num_classes
     self.seg_num_grids = num_grids
     self.cate_out_channels = self.num_classes
     self.ins_out_channels = ins_out_channels
     self.in_channels = in_channels
     self.seg_feat_channels = seg_feat_channels
     self.stacked_convs = stacked_convs
     self.strides = strides
     self.sigma = sigma
     self.stacked_convs = stacked_convs
     self.kernel_out_channels = self.ins_out_channels * 1 * 1
     self.base_edge_list = base_edge_list
     self.scale_ranges = scale_ranges
     self.background_label = (num_classes if background_label is None else
                              background_label)
     # background_label should be either 0 or num_classes
     assert (self.background_label == 0
             or self.background_label == num_classes)
     self.loss_cls = build_loss(loss_cls)
     self.ins_loss_weight = loss_mask['loss_weight']
     self.conv_cfg = conv_cfg
     self.norm_cfg = norm_cfg
     self.train_cfg = train_cfg
     self.test_cfg = test_cfg
     self.use_dcn_in_tower = use_dcn_in_tower
     self.type_dcn = type_dcn
     self._init_layers()
예제 #28
0
파일: db_head.py 프로젝트: xyzhu8/mmocr
    def __init__(self,
                 in_channels,
                 with_bias=False,
                 decoding_type='db',
                 text_repr_type='poly',
                 downsample_ratio=1.0,
                 loss=dict(type='DBLoss'),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=[
                     dict(type='Kaiming', layer='Conv'),
                     dict(type='Constant',
                          layer='BatchNorm',
                          val=1.,
                          bias=1e-4)
                 ]):
        """Initialization.

        Args:
            in_channels (int): The number of input channels of the db head.
            decoding_type (str): The type of decoder for dbnet.
            text_repr_type (str): Boundary encoding type 'poly' or 'quad'.
            downsample_ratio (float): The downsample ratio of ground truths.
            loss (dict): The type of loss for dbnet.
        """
        super().__init__(init_cfg=init_cfg)

        assert isinstance(in_channels, int)

        self.in_channels = in_channels
        self.text_repr_type = text_repr_type
        self.loss_module = build_loss(loss)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.downsample_ratio = downsample_ratio
        self.decoding_type = decoding_type

        self.binarize = Sequential(
            nn.Conv2d(in_channels,
                      in_channels // 4,
                      3,
                      bias=with_bias,
                      padding=1), nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid())

        self.threshold = self._init_thr(in_channels)
예제 #29
0
    def __init__(self,
                 num_classes,
                 num_fcs=3,
                 in_channels=256,
                 fc_channels=256,
                 class_agnostic=False,
                 coarse_pred_each_layer=True,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=None,
                 act_cfg=dict(type='ReLU'),
                 loss_point=dict(type='CrossEntropyLoss',
                                 use_mask=True,
                                 loss_weight=1.0),
                 init_cfg=dict(type='Normal',
                               std=0.001,
                               override=dict(name='fc_logits'))):
        super().__init__(init_cfg)
        self.num_fcs = num_fcs
        self.in_channels = in_channels
        self.fc_channels = fc_channels
        self.num_classes = num_classes
        self.class_agnostic = class_agnostic
        self.coarse_pred_each_layer = coarse_pred_each_layer
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.loss_point = build_loss(loss_point)

        fc_in_channels = in_channels + num_classes
        self.fcs = nn.ModuleList()
        for _ in range(num_fcs):
            fc = ConvModule(fc_in_channels,
                            fc_channels,
                            kernel_size=1,
                            stride=1,
                            padding=0,
                            conv_cfg=conv_cfg,
                            norm_cfg=norm_cfg,
                            act_cfg=act_cfg)
            self.fcs.append(fc)
            fc_in_channels = fc_channels
            fc_in_channels += num_classes if self.coarse_pred_each_layer else 0

        out_channels = 1 if self.class_agnostic else self.num_classes
        self.fc_logits = nn.Conv1d(fc_in_channels,
                                   out_channels,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
예제 #30
0
    def __init__(self,
                 preprocessor=None,
                 backbone=None,
                 neck=None,
                 head=None,
                 loss=None,
                 label_convertor=None,
                 train_cfg=None,
                 test_cfg=None,
                 pretrained=None,
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)

        # Label_convertor
        assert label_convertor is not None
        self.label_convertor = build_convertor(label_convertor)

        # Preprocessor module, e.g., TPS
        self.preprocessor = None
        if preprocessor is not None:
            self.preprocessor = build_preprocessor(preprocessor)

        # Backbone
        assert backbone is not None
        self.backbone = build_backbone(backbone)

        # Neck
        assert neck is not None
        self.neck = build_neck(neck)

        # Head
        assert head is not None
        head.update(num_classes=self.label_convertor.num_classes())
        self.head = build_head(head)

        # Loss
        assert loss is not None
        self.loss = build_loss(loss)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        if pretrained is not None:
            warnings.warn('DeprecationWarning: pretrained is a deprecated \
                key, please consider using init_cfg')
            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)