def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0), init_cfg=None): super(BBoxHead, self).__init__(init_cfg) assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.debug_imgs = None if init_cfg is None: self.init_cfg = [] if self.with_cls: self.init_cfg += [ dict(type='Normal', std=0.01, override=dict(name='fc_cls')) ] if self.with_reg: self.init_cfg += [ dict(type='Normal', std=0.001, override=dict(name='fc_reg')) ]
def __init__( self, vertex_head, polyrnn_head, loss_vertex=dict(type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), loss_polygon=dict(type='CrossEntropyLoss', use_mask=False, loss_weight=1.0), loss_offset=dict(type='L1Loss', loss_weight=1.0), loss_type=0, params=dict(dt_threshold=2, radius=1), ): super(PolygonHead, self).__init__() self.vertex_head = build_head(vertex_head) polyrnn_head['loss_type'] = loss_type self.polyrnn_head = build_head(polyrnn_head) self.loss_vertex_cfg = loss_vertex self.loss_polygon_cfg = loss_polygon self.loss_vertex = build_loss(loss_vertex) self.loss_polygon = build_loss(loss_polygon) self.loss_offset = build_loss(loss_offset) self.loss_type = loss_type self.params = params
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, start_bbox_type='hbb', end_bbox_type='hbb', reg_dim=None, roi_feat_size=7, in_channels=256, num_classes=15, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(OBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.start_bbox_type = start_bbox_type self.end_bbox_type = end_bbox_type assert self.start_bbox_type in ['hbb', 'obb', 'poly'] assert self.end_bbox_type in ['hbb', 'obb', 'poly'] self.reg_dim = get_bbox_dim(self.end_bbox_type) \ if reg_dim is None else reg_dim self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = self.reg_dim if reg_class_agnostic else \ self.reg_dim * num_classes self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.debug_imgs = None
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, score_type='normal', head_config=[True,False,False], init_type='normal', bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(AttentionLogoBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = False self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) # 回归score的方式 self.score_type = score_type self.head_config = head_config self.init_type = init_type in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.debug_imgs = None
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(LogoDCBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = False self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.debug_imgs = None self.file_to_style = {} self.data_path = '/data/zhaozhiyuan/tb_variation/VOCdevkit_all' self.anno_path = os.path.join(self.data_path, 'VOC2007', 'Annotations') """init style and class index""" for anno in os.listdir(self.anno_path): anno_file = ET.parse(os.path.join(self.anno_path, anno)) name = anno_file.find('object').find('name').text style = anno_file.find('object').find('style').text self.file_to_style[anno.split('.')[0]] = style
def __init__(self, num_query=1, transformer=None, positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True), bbox_head=None, cls_head=None, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, ), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0), train_cfg=None, test_cfg=None, init_cfg=None, frozen_modules=None, **kwargs): super(StarkHead, self).__init__(init_cfg=init_cfg) self.transformer = build_transformer(transformer) self.positional_encoding = build_positional_encoding( positional_encoding) assert bbox_head is not None self.bbox_head = build_head(bbox_head) if cls_head is None: # the stage-1 training self.loss_bbox = build_loss(loss_bbox) self.loss_iou = build_loss(loss_iou) self.cls_head = None else: # the stage-2 training self.cls_head = build_head(cls_head) self.loss_cls = build_loss(loss_cls) self.embed_dims = self.transformer.embed_dims self.num_query = num_query self.query_embedding = nn.Embedding(self.num_query, self.embed_dims) self.train_cfg = train_cfg self.test_cfg = test_cfg self.fp16_enabled = False if frozen_modules is not None: assert isinstance(frozen_modules, list) for module in frozen_modules: m = getattr(self, module) # TODO: Study the influence of freezing BN running_mean and # running_variance of `frozen_modules` in the 2nd stage train. # The official code doesn't freeze these. for param in m.parameters(): param.requires_grad = False
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], anchor_base_sizes=None, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), with_cls=True, sampling=True, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)): super(CascadeAnchorHeadRbbox, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_base_sizes = list( anchor_strides) if anchor_base_sizes is None else anchor_base_sizes self.target_means = target_means self.target_stds = target_stds self.with_cls = with_cls self.sampling = sampling self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.cls_focal_loss = loss_cls['type'] in ['FocalLoss'] if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 else: self.cls_out_channels = num_classes self.use_iou_reg = loss_bbox['type'] in ['IoULoss'] self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) if self.cls_focal_loss: assert not sampling self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes - 1 else: self.cls_out_channels = self.num_classes
def __init__( self, num_classes, in_channels, feat_channels=256, stacked_convs=4, strides=(4, 8, 16, 32, 64), scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)), pos_scale=0.2, num_grids=[40, 36, 24, 16, 12], cls_down_index=0, loss_mask=None, loss_cls=None, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), train_cfg=None, test_cfg=None, init_cfg=[ dict(type='Normal', layer='Conv2d', std=0.01), dict( type='Normal', std=0.01, bias_prob=0.01, override=dict(name='conv_mask_list')), dict( type='Normal', std=0.01, bias_prob=0.01, override=dict(name='conv_cls')) ], ): super(SOLOHead, self).__init__(init_cfg) self.num_classes = num_classes self.cls_out_channels = self.num_classes self.in_channels = in_channels self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.strides = strides self.num_grids = num_grids # number of FPN feats self.num_levels = len(strides) assert self.num_levels == len(scale_ranges) == len(num_grids) self.scale_ranges = scale_ranges self.pos_scale = pos_scale self.cls_down_index = cls_down_index self.loss_cls = build_loss(loss_cls) self.loss_mask = build_loss(loss_mask) self.norm_cfg = norm_cfg self.init_cfg = init_cfg self.train_cfg = train_cfg self.test_cfg = test_cfg self._init_layers()
def __init__(self, roi_feat_size=7, in_channels=256, num_convs=4, num_fcs=2, reg_num=2, conv_out_channels=256, fc_out_channels=1024, offset_coordinate='rectangle', offset_coder=dict( type='DeltaXYOffsetCoder', target_means=[0.0, 0.0], target_stds=[0.5, 0.5]), reg_decoded_offset=False, conv_cfg=None, norm_cfg=None, loss_offset=dict(type='MSELoss', loss_weight=1.0)): super(OffsetHead, self).__init__() self.in_channels = in_channels self.conv_out_channels = conv_out_channels self.fc_out_channels = fc_out_channels self.offset_coordinate = offset_coordinate self.reg_decoded_offset = reg_decoded_offset self.reg_num = reg_num self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.offset_coder = build_bbox_coder(offset_coder) self.loss_offset = build_loss(loss_offset) self.convs = nn.ModuleList() for i in range(num_convs): in_channels = (self.in_channels if i == 0 else self.conv_out_channels) self.convs.append( Conv2d( in_channels, self.conv_out_channels, 3, padding=1)) roi_feat_size = _pair(roi_feat_size) roi_feat_area = roi_feat_size[0] * roi_feat_size[1] self.fcs = nn.ModuleList() for i in range(num_fcs): in_channels = ( self.conv_out_channels * roi_feat_area if i == 0 else self.fc_out_channels) self.fcs.append(nn.Linear(in_channels, self.fc_out_channels)) self.fc_offset = nn.Linear(self.fc_out_channels, self.reg_num) self.relu = nn.ReLU() self.loss_offset = build_loss(loss_offset)
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(BBoxHeadBN, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes # self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.fc_reg = nn.Sequential( nn.Linear(self.in_channels, out_dim_reg), nn.BatchNorm1d(4)) self.debug_imgs = None
def __init__(self, num_classes, in_channels, feat_channels=256, bbox_type='hbb', reg_dim=None, stacked_convs=4, strides=(4, 8, 16, 32, 64), dcn_on_last_conv=False, conv_bias='auto', background_label=None, loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='IoULoss', loss_weight=1.0), conv_cfg=None, norm_cfg=None, train_cfg=None, test_cfg=None): super(OBBAnchorFreeHead, self).__init__() self.num_classes = num_classes self.cls_out_channels = num_classes self.in_channels = in_channels self.feat_channels = feat_channels self.bbox_type = bbox_type self.reg_dim = get_bbox_dim(self.bbox_type) \ if reg_dim is None else reg_dim self.stacked_convs = stacked_convs self.strides = strides self.dcn_on_last_conv = dcn_on_last_conv assert conv_bias == 'auto' or isinstance(conv_bias, bool) self.conv_bias = conv_bias self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.fp16_enabled = False self.background_label = (num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self._init_layers()
def __init__(self, num_fcs=2, fc_out_channels=1024, gggs_config=None, *args, **kwargs): super(GGGSBBoxHeadWith0, self).__init__(num_fcs=num_fcs, fc_out_channels=fc_out_channels, *args, **kwargs) self.fc_cls = CLASS_HEAD(gggs_config=gggs_config, cls_last_dim=self.cls_last_dim, fc_out_channels=fc_out_channels) self.loss_bins = [] for i in range(gggs_config.num_bins): self.loss_bins.append(build_loss(gggs_config.loss_bin)) self.label2binlabel = torch.load(gggs_config.label2binlabel).cuda() self.pred_slice = torch.load(gggs_config.pred_slice).cuda() # TODO: update this ugly implementation. Save fg_split to a list and # load groups by gs_config.num_bins with open(gggs_config.fg_split, 'rb') as fin: fg_split = pickle.load(fin) self.fg_splits = [] self.fg_splits.append(torch.from_numpy(fg_split['(10000,~)']).cuda()) self.fg_splits.append( torch.from_numpy(fg_split['(2000,10000)']).cuda()) self.fg_splits.append(torch.from_numpy(fg_split['(500,2000)']).cuda()) self.fg_splits.append(torch.from_numpy(fg_split['(0,500)']).cuda()) self.others_sample_ratio = gggs_config.others_sample_ratio
def __init__(self, models): super(EnsembleModel, self).__init__() self.models = models # TODO: Fix half() self.fusion = Fusion().half() self.num_classes = 1 loss_cls = dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0) assigner = dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1) self.assigner = build_assigner(assigner) self.loss_cls = build_loss(loss_cls) sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg)
def __init__(self, num_chars=92, visual_dim=64, fusion_dim=1024, node_input=32, node_embed=256, edge_input=5, edge_embed=256, num_gnn=2, num_classes=26, loss=dict(type='SDMGRLoss'), bidirectional=False, train_cfg=None, test_cfg=None): super().__init__() self.fusion = Block([visual_dim, node_embed], node_embed, fusion_dim) self.node_embed = nn.Embedding(num_chars, node_input, 0) hidden = node_embed // 2 if bidirectional else node_embed self.rnn = nn.LSTM(input_size=node_input, hidden_size=hidden, num_layers=1, batch_first=True, bidirectional=bidirectional) self.edge_embed = nn.Linear(edge_input, edge_embed) self.gnn_layers = nn.ModuleList( [GNNLayer(node_embed, edge_embed) for _ in range(num_gnn)]) self.node_cls = nn.Linear(node_embed, num_classes) self.edge_cls = nn.Linear(edge_embed, 2) self.loss = build_loss(loss)
def __init__(self, tau=0.2, min_iof=0.7, loss_sim=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), init_cfg=None, *args, **kwargs): super(CORE2FCBBoxHead, self).__init__(*args, init_cfg=init_cfg, **kwargs) self.tau = tau self.min_iof = min_iof self.loss_sim = build_loss(loss_sim) # add relation embedding self.rel_fcs = nn.ModuleList() for i in range(self.num_shared_fcs): rel_fc = nn.Sequential( nn.Linear(self.shared_out_channels, self.shared_out_channels), nn.ReLU(inplace=True), nn.Linear(self.shared_out_channels, 128)) self.rel_fcs.append(rel_fc) if init_cfg is None: self.init_cfg += [ dict(type='Xavier', layer='Linear', override=[ dict(name='rel_fcs'), ]) ]
def __init__( self, num_classes, in_channels, stacked_convs=4, conv_cfg=None, norm_cfg=dict(type="GN", num_groups=32, requires_grad=True), loss_dfl=dict(type="DistributionFocalLoss", loss_weight=0.25), reg_max=16, **kwargs, ): self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.reg_max = reg_max super(ModifiedGFLHead, self).__init__(num_classes, in_channels, **kwargs) self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type="PseudoSampler") self.sampler = build_sampler(sampler_cfg, context=self) self.integral = Integral(self.reg_max) self.loss_dfl = build_loss(loss_dfl)
def __init__( self, in_channels, out_channels, text_repr_type='poly', # 'poly' or 'quad' downsample_ratio=0.25, loss=dict(type='PANLoss'), train_cfg=None, test_cfg=None): super().__init__() assert check_argument.is_type_list(in_channels, int) assert isinstance(out_channels, int) assert text_repr_type in ['poly', 'quad'] assert 0 <= downsample_ratio <= 1 self.loss_module = build_loss(loss) self.in_channels = in_channels self.out_channels = out_channels self.text_repr_type = text_repr_type self.train_cfg = train_cfg self.test_cfg = test_cfg self.downsample_ratio = downsample_ratio if loss['type'] == 'PANLoss': self.decoding_type = 'pan' elif loss['type'] == 'PSELoss': self.decoding_type = 'pse' else: type = loss['type'] raise NotImplementedError(f'unsupported loss type {type}.') self.out_conv = nn.Conv2d(in_channels=np.sum(np.array(in_channels)), out_channels=out_channels, kernel_size=1) self.init_weights()
def __init__(self, in_channels, decoding_type='textsnake', text_repr_type='poly', loss=dict(type='TextSnakeLoss'), train_cfg=None, test_cfg=None, init_cfg=dict(type='Normal', override=dict(name='out_conv'), mean=0, std=0.01)): super().__init__(init_cfg=init_cfg) assert isinstance(in_channels, int) self.in_channels = in_channels self.out_channels = 5 self.downsample_ratio = 1.0 self.decoding_type = decoding_type self.text_repr_type = text_repr_type self.loss_module = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.out_conv = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1, stride=1, padding=0)
def __init__( self, with_avg_pool=False, num_shared_fcs=2, roi_feat_size=7, in_channels=256, fc_out_channels=1024, num_classes=15, reg_class_agnostic=False, ratio_thr=0.8, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), fix_coder=dict(type='GVFixCoder'), ratio_coder=dict(type='GVRatioCoder'), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0), loss_fix=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0), loss_ratio=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=16.0), ): super(GVBBoxHead, self).__init__() self.with_avg_pool = with_avg_pool self.num_shared_fcs = num_shared_fcs self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.fc_out_channels = fc_out_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.ratio_thr = ratio_thr self.fp16_enabled = False self.start_bbox_type = 'hbb' self.end_bbox_type = 'poly' self.bbox_coder = build_bbox_coder(bbox_coder) self.fix_coder = build_bbox_coder(fix_coder) self.ratio_coder = build_bbox_coder(ratio_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_fix = build_loss(loss_fix) self.loss_ratio = build_loss(loss_ratio) self._init_layers()
def __init__(self, num_fcs, in_channels, fc_channels, out_channels, norm_cfg=None, act_cfg=None, num_classes=None, loss=None, loss_pairwise=None, topk=(1, ), init_cfg=dict( type='Normal', layer='Linear', mean=0, std=0.01, bias=0)): super(LinearReIDHead, self).__init__(init_cfg) assert isinstance(topk, (int, tuple)) if isinstance(topk, int): topk = (topk, ) for _topk in topk: assert _topk > 0, 'Top-k should be larger than 0' self.topk = topk if not loss: if isinstance(num_classes, int): warnings.warn('Since cross entropy is not set, ' 'the num_classes will be ignored.') if not loss_pairwise: raise ValueError('Please choose at least one loss in ' 'triplet loss and cross entropy loss.') elif not isinstance(num_classes, int): raise TypeError('The num_classes must be a current number, ' 'if there is cross entropy loss.') self.loss_cls = build_loss(loss) if loss else None self.loss_triplet = build_loss( loss_pairwise) if loss_pairwise else None self.num_fcs = num_fcs self.in_channels = in_channels self.fc_channels = fc_channels self.out_channels = out_channels self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.num_classes = num_classes self.accuracy = Accuracy(topk=self.topk) self.fp16_enabled = False self._init_layers()
def __init__(self, in_channels, scales, fourier_degree=5, num_sample=50, num_reconstr_points=50, decoding_type='fcenet', loss=dict(type='FCELoss'), score_thr=0.3, nms_thr=0.1, alpha=1.0, beta=1.0, text_repr_type='poly', train_cfg=None, test_cfg=None, init_cfg=dict(type='Normal', mean=0, std=0.01, override=[ dict(name='out_conv_cls'), dict(name='out_conv_reg') ])): super().__init__(init_cfg=init_cfg) assert isinstance(in_channels, int) self.downsample_ratio = 1.0 self.in_channels = in_channels self.scales = scales self.fourier_degree = fourier_degree self.sample_num = num_sample self.num_reconstr_points = num_reconstr_points loss['fourier_degree'] = fourier_degree loss['num_sample'] = num_sample self.decoding_type = decoding_type self.loss_module = build_loss(loss) self.score_thr = score_thr self.nms_thr = nms_thr self.alpha = alpha self.beta = beta self.text_repr_type = text_repr_type self.train_cfg = train_cfg self.test_cfg = test_cfg self.out_channels_cls = 4 self.out_channels_reg = (2 * self.fourier_degree + 1) * 2 self.out_conv_cls = nn.Conv2d(self.in_channels, self.out_channels_cls, kernel_size=3, stride=1, padding=1) self.out_conv_reg = nn.Conv2d(self.in_channels, self.out_channels_reg, kernel_size=3, stride=1, padding=1)
def __init__(self, num_classes, in_channels, regress_ranges=((-1, 48), (48, 96), (96, 192), (192, 384), (384, INF)), center_sampling=True, center_sample_radius=1.5, norm_on_bbox=True, centerness_on_reg=True, centerness_alpha=2.5, loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_dir=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_attr=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_centerness=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), centerness_branch=(64, ), init_cfg=None, **kwargs): self.regress_ranges = regress_ranges self.center_sampling = center_sampling self.center_sample_radius = center_sample_radius self.norm_on_bbox = norm_on_bbox self.centerness_on_reg = centerness_on_reg self.centerness_alpha = centerness_alpha self.centerness_branch = centerness_branch super().__init__(num_classes, in_channels, loss_cls=loss_cls, loss_bbox=loss_bbox, loss_dir=loss_dir, loss_attr=loss_attr, norm_cfg=norm_cfg, init_cfg=init_cfg, **kwargs) self.loss_centerness = build_loss(loss_centerness) if init_cfg is None: self.init_cfg = dict(type='Normal', layer='Conv2d', std=0.01, override=dict(type='Normal', name='conv_cls', std=0.01, bias_prob=0.01))
def __init__(self, preprocessor=None, backbone=None, encoder=None, decoder=None, loss=None, label_convertor=None, train_cfg=None, test_cfg=None, max_seq_len=40, pretrained=None, init_cfg=None): super().__init__(init_cfg=init_cfg) # Label convertor (str2tensor, tensor2str) assert label_convertor is not None label_convertor.update(max_seq_len=max_seq_len) self.label_convertor = build_convertor(label_convertor) # Preprocessor module, e.g., TPS self.preprocessor = None if preprocessor is not None: self.preprocessor = build_preprocessor(preprocessor) # Backbone assert backbone is not None self.backbone = build_backbone(backbone) # Encoder module self.encoder = None if encoder is not None: self.encoder = build_encoder(encoder) # Decoder module assert decoder is not None decoder.update(num_classes=self.label_convertor.num_classes()) decoder.update(start_idx=self.label_convertor.start_idx) decoder.update(padding_idx=self.label_convertor.padding_idx) decoder.update(max_seq_len=max_seq_len) self.decoder = build_decoder(decoder) # Loss assert loss is not None loss.update(ignore_index=self.label_convertor.padding_idx) self.loss = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.max_seq_len = max_seq_len if pretrained is not None: warnings.warn('DeprecationWarning: pretrained is a deprecated \ key, please consider using init_cfg') self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
def __init__(self, gs_config=None, *args, **kwargs): super(GSBBoxHeadWithV2, self).__init__(*args,**kwargs) self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes + 5) # self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), # torch.tensor([0, 1, 5, 2, 3, 5, 5, 5, 5, 4, 5, 5]), # torch.tensor([6, 6, 0, 6, 6, 1, 2, 3, 4, 6, 5, 6])] # self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), # torch.tensor([4, 4, 4, 4, 4, 4, 0, 1, 2, 4, 3, 4]), # torch.tensor([5, 0, 1, 5, 2, 3, 5, 5, 5, 4, 5, 5]), # torch.tensor([0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2])] self.label2binlabel = [torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), torch.tensor([0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), torch.tensor([4, 4, 4, 0, 1, 2, 4, 4, 3, 4, 4, 4]), torch.tensor([2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 2, 2]), torch.tensor([2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 2])] # self.pred_slice = [ # [0, 2], # [2, 6], # [8, 7], # ] # self.pred_slice = [ # [0, 2], # [2, 5], # [7, 6], # [13, 3], # ] self.pred_slice = [ [0, 2], [2, 3], [5, 5], [10, 3], [13, 3] ] # self.fg_splits = [ # torch.tensor([0, 1, 3, 4, 9]), # torch.tensor([2, 5, 6, 7, 8, 10]) # ] # self.fg_splits = [ # torch.tensor([6, 7, 8, 10]), # torch.tensor([1, 2, 4, 5, 9]), # torch.tensor([0, 3]) # ] self.fg_splits = [ torch.tensor([0, 1]), torch.tensor([3, 4, 5, 8]), torch.tensor([2, 9]), torch.tensor([6, 7]) ] self.others_sample_ratio = 3 self.loss_bins = [] for i in range(5): self.loss_bins.append(build_loss(gs_config.loss_bin))
def __init__(self, num_convs=4, num_fcs=2, roi_feat_size=14, in_channels=256, conv_out_channels=256, fc_out_channels=1024, num_classes=80, loss_iou=dict(type='MSELoss', loss_weight=0.5), init_cfg=[ dict(type='Kaiming', override=dict(name='convs')), dict(type='Caffe2Xavier', override=dict(name='fcs')), dict( type='Normal', std=0.01, override=dict(name='fc_mask_iou')) ]): super(MaskIoUHead, self).__init__(init_cfg) self.in_channels = in_channels self.conv_out_channels = conv_out_channels self.fc_out_channels = fc_out_channels self.num_classes = num_classes self.fp16_enabled = False self.convs = nn.ModuleList() for i in range(num_convs): if i == 0: # concatenation of mask feature and mask prediction in_channels = self.in_channels + 1 else: in_channels = self.conv_out_channels stride = 2 if i == num_convs - 1 else 1 self.convs.append( Conv2d( in_channels, self.conv_out_channels, 3, stride=stride, padding=1)) roi_feat_size = _pair(roi_feat_size) pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2) self.fcs = nn.ModuleList() for i in range(num_fcs): in_channels = ( self.conv_out_channels * pooled_area if i == 0 else self.fc_out_channels) self.fcs.append(Linear(in_channels, self.fc_out_channels)) self.fc_mask_iou = Linear(self.fc_out_channels, self.num_classes) self.relu = nn.ReLU() self.max_pool = MaxPool2d(2, 2) self.loss_iou = build_loss(loss_iou)
def __init__(self, preprocessor=None, backbone=None, encoder=None, decoder=None, loss=None, label_convertor=None, train_cfg=None, test_cfg=None, max_seq_len=40, pretrained=None): super().__init__() # Label convertor (str2tensor, tensor2str) assert label_convertor is not None label_convertor.update(max_seq_len=max_seq_len) self.label_convertor = build_convertor(label_convertor) # Preprocessor module, e.g., TPS self.preprocessor = None if preprocessor is not None: self.preprocessor = build_preprocessor(preprocessor) # Backbone assert backbone is not None self.backbone = build_backbone(backbone) # Encoder module self.encoder = None if encoder is not None: self.encoder = build_encoder(encoder) # Decoder module assert decoder is not None decoder.update(num_classes=self.label_convertor.num_classes()) decoder.update(start_idx=self.label_convertor.start_idx) decoder.update(padding_idx=self.label_convertor.padding_idx) decoder.update(max_seq_len=max_seq_len) self.decoder = build_decoder(decoder) # Loss assert loss is not None loss.update(ignore_index=self.label_convertor.padding_idx) self.loss = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.max_seq_len = max_seq_len self.init_weights(pretrained=pretrained)
def __init__(self, num_classes, in_channels, seg_feat_channels=256, stacked_convs=4, strides=(4, 8, 16, 32, 64), base_edge_list=(16, 32, 64, 128, 256), scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)), sigma=0.2, num_grids=None, ins_out_channels=64, background_label=None, loss_mask=None, loss_cls=None, conv_cfg=None, norm_cfg=None, train_cfg=None, test_cfg=None, use_dcn_in_tower=False, type_dcn=None): super(SOLOv2Head, self).__init__() self.num_classes = num_classes self.seg_num_grids = num_grids self.cate_out_channels = self.num_classes self.ins_out_channels = ins_out_channels self.in_channels = in_channels self.seg_feat_channels = seg_feat_channels self.stacked_convs = stacked_convs self.strides = strides self.sigma = sigma self.stacked_convs = stacked_convs self.kernel_out_channels = self.ins_out_channels * 1 * 1 self.base_edge_list = base_edge_list self.scale_ranges = scale_ranges self.background_label = (num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self.loss_cls = build_loss(loss_cls) self.ins_loss_weight = loss_mask['loss_weight'] self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.train_cfg = train_cfg self.test_cfg = test_cfg self.use_dcn_in_tower = use_dcn_in_tower self.type_dcn = type_dcn self._init_layers()
def __init__(self, in_channels, with_bias=False, decoding_type='db', text_repr_type='poly', downsample_ratio=1.0, loss=dict(type='DBLoss'), train_cfg=None, test_cfg=None, init_cfg=[ dict(type='Kaiming', layer='Conv'), dict(type='Constant', layer='BatchNorm', val=1., bias=1e-4) ]): """Initialization. Args: in_channels (int): The number of input channels of the db head. decoding_type (str): The type of decoder for dbnet. text_repr_type (str): Boundary encoding type 'poly' or 'quad'. downsample_ratio (float): The downsample ratio of ground truths. loss (dict): The type of loss for dbnet. """ super().__init__(init_cfg=init_cfg) assert isinstance(in_channels, int) self.in_channels = in_channels self.text_repr_type = text_repr_type self.loss_module = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg self.downsample_ratio = downsample_ratio self.decoding_type = decoding_type self.binarize = Sequential( nn.Conv2d(in_channels, in_channels // 4, 3, bias=with_bias, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), nn.Sigmoid()) self.threshold = self._init_thr(in_channels)
def __init__(self, num_classes, num_fcs=3, in_channels=256, fc_channels=256, class_agnostic=False, coarse_pred_each_layer=True, conv_cfg=dict(type='Conv1d'), norm_cfg=None, act_cfg=dict(type='ReLU'), loss_point=dict(type='CrossEntropyLoss', use_mask=True, loss_weight=1.0), init_cfg=dict(type='Normal', std=0.001, override=dict(name='fc_logits'))): super().__init__(init_cfg) self.num_fcs = num_fcs self.in_channels = in_channels self.fc_channels = fc_channels self.num_classes = num_classes self.class_agnostic = class_agnostic self.coarse_pred_each_layer = coarse_pred_each_layer self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.loss_point = build_loss(loss_point) fc_in_channels = in_channels + num_classes self.fcs = nn.ModuleList() for _ in range(num_fcs): fc = ConvModule(fc_in_channels, fc_channels, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.fcs.append(fc) fc_in_channels = fc_channels fc_in_channels += num_classes if self.coarse_pred_each_layer else 0 out_channels = 1 if self.class_agnostic else self.num_classes self.fc_logits = nn.Conv1d(fc_in_channels, out_channels, kernel_size=1, stride=1, padding=0)
def __init__(self, preprocessor=None, backbone=None, neck=None, head=None, loss=None, label_convertor=None, train_cfg=None, test_cfg=None, pretrained=None, init_cfg=None): super().__init__(init_cfg=init_cfg) # Label_convertor assert label_convertor is not None self.label_convertor = build_convertor(label_convertor) # Preprocessor module, e.g., TPS self.preprocessor = None if preprocessor is not None: self.preprocessor = build_preprocessor(preprocessor) # Backbone assert backbone is not None self.backbone = build_backbone(backbone) # Neck assert neck is not None self.neck = build_neck(neck) # Head assert head is not None head.update(num_classes=self.label_convertor.num_classes()) self.head = build_head(head) # Loss assert loss is not None self.loss = build_loss(loss) self.train_cfg = train_cfg self.test_cfg = test_cfg if pretrained is not None: warnings.warn('DeprecationWarning: pretrained is a deprecated \ key, please consider using init_cfg') self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)