def __init__(self, backbone='mobilenet', output_stride=8, num_classes=1, sync_bn=True, freeze_bn=False): super(ShadowNet2, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, BatchNorm) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.reduce1 = LayerConv(320, 256, 1, 1, 0, False) self.dsc = DSC_Module(256, 256) self.reduce2 = LayerConv(512, 256, 1, 1, 0, False) self.decoder = build_decoder(num_classes, backbone, BatchNorm) if freeze_bn: self.freeze_bn()
def __init__(self, backbone='mobilenet', output_stride=8, num_classes=1, sync_bn=True, freeze_bn=False): super(basic, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, BatchNorm) self.last_conv = nn.Sequential( nn.Conv2d(320, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), # nn.Dropout(0.5), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) if freeze_bn: self.freeze_bn()
def __init__(self, backbone='resnet101', output_stride=16, num_classes=21, bn='bn', freeze_bn=False): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 self.best_iou = 0 if bn == 'sync_bn': BatchNorm = SynchronizedBatchNorm2d # elif bn == 'sync_abn': # BatchNorm = InPlaceABNSync elif bn == 'bn': BatchNorm = nn.BatchNorm2d # elif bn == 'abn': # BatchNorm = InPlaceABN elif bn == 'gn': BatchNorm = nn.GroupNorm else: raise NotImplementedError( 'batch norm choice {} is not implemented'.format(bn)) self.backbone = build_backbone(backbone, output_stride, BatchNorm) # self.backbone._load_pretrained_model() self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) if freeze_bn: self.freeze_bn()
def __init__(self, backbone='xception', output_stride=16, num_classes=9, freeze_bn=False): super(DeepLab, self).__init__() self.backbone = build_backbone(backbone, output_stride) self.aspp = build_aspp(backbone, output_stride) self.decoder = build_decoder(num_classes, backbone) self.freeze_bn = freeze_bn
def __init__(self, backbone='resnet', n_in_channels=1, output_stride=16, num_classes=1, n_bottleneck_channels=1, sync_bn=True, freeze_bn=False, pretrained_backbone=False): super(DeepLabBottleNeck, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, n_in_channels, output_stride, BatchNorm, pretrained_backbone) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm, n_bottleneck_channels) self.activate_tanh = nn.Tanh() self.activate_sigmoid = nn.Sigmoid() self.freeze_bn = freeze_bn
def __init__(self, backbone='resnet101', output_stride=16, num_classes=21, bn='bn', freeze_bn=False, modal_num=3): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 self.best_iou = 0 if bn == 'sync_bn': BatchNorm = SynchronizedBatchNorm2d elif bn == 'bn': BatchNorm = nn.BatchNorm2d elif bn == 'gn': BatchNorm = nn.GroupNorm else: raise NotImplementedError('batch norm choice {} is not implemented'.format(bn)) self.backbone = build_backbone(backbone, output_stride, BatchNorm) # aspp/decoder-branches self.modal_num = modal_num self.aspps = [] self.decoders = [] for item in range(modal_num): self.aspps.append(build_aspp(backbone, output_stride, BatchNorm)) self.decoders.append(build_decoder(num_classes, backbone, BatchNorm)) self.aspps = nn.ModuleList(self.aspps) self.decoders = nn.ModuleList(self.decoders) # attention-branch self.attention_decoder = build_attention_decoder(num_classes, modal_num, backbone, BatchNorm) if freeze_bn: self.freeze_bn()
def __init__(self, num_cls, backbone='resnet', out_stride=8): super(OCRNet, self).__init__() self.backbone = build_backbone(backbone, out_stride) if backbone == 'resnet': low_feat_ch, out_ch = 1024, 2048 elif backbone == 'hrnet': low_feat_ch, out_ch = 768, 1024 else: raise NotImplementedError self.conv_3x3 = nn.Sequential( nn.Conv2d(out_ch, 512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512) ) self.dsn_head = nn.Sequential( nn.Conv2d(low_feat_ch, 512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.Dropout2d(0.05), nn.Conv2d(512, num_cls, kernel_size=1, stride=1, padding=0, bias=True) ) self.conv1 = conv1d(512, 256) self.conv2 = conv1d(512, 256) self.conv3 = conv1d(512, 256) self.conv4 = conv1d(256, 512) self.conv5 = conv2d(512 + 512, 512, 1) self.conv6 = conv2d(512, num_cls, 1)
def build_model(cfg): backbone = build_backbone(cfg) aggregator = build_volume_generator(cfg) volumetric_regressor = build_volumetric_regressor(cfg) return MarkerlessMoCap(backbone, aggregator, volumetric_regressor, cfg.DEVICE)
def __init__(self, NoLabels, pretrained): super(Deeplab_fuse, self).__init__() self.backbone = build_backbone('resnet_ms', in_channel=3, pretrained=pretrained) self.aspp = build_aspp(output_stride=16) self.branch = nn.Sequential( nn.Conv2d(2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, kernel_size=1), nn.BatchNorm2d(512), nn.ReLU()) self.fuse = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=1), nn.BatchNorm2d(1024), nn.ReLU(), nn.Conv2d(1024, 2048, kernel_size=1), nn.BatchNorm2d(2048), nn.ReLU()) self.refine = nn.Sequential( #nn.Conv2d(48+256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.Conv2d(256 + 256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU()) self.predict = nn.Sequential( #nn.Conv2d(48+256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.Conv2d(128 + 64, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, NoLabels, kernel_size=1)) #self.register_buffer('mean', torch.FloatTensor([0.485, 0.456, 0.406, -0.329]).view(1,4,1,1)) #self.register_buffer('std', torch.FloatTensor([0.229, 0.224, 0.225, 0.051]).view(1,4,1,1)) self.register_buffer( 'mean', torch.FloatTensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) self.register_buffer( 'std', torch.FloatTensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) self._init_weight()
def __init__(self, model_config: dict): super().__init__() model_config = Dict(model_config) backbone_type = model_config.backbone.pop('type') neck_type = model_config.neck.pop('type') head_type = model_config.head.pop('type') self.backbone = build_backbone(backbone_type, **model_config.backbone) self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck) self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head) self.name = f'{backbone_type}_{neck_type}_{head_type}'
def main(args): print("git:\n {}\n".format(utils.get_sha())) print(args) valid_obj_ids = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90) verb_classes = [ 'hold_obj', 'stand', 'sit_instr', 'ride_instr', 'walk', 'look_obj', 'hit_instr', 'hit_obj', 'eat_obj', 'eat_instr', 'jump_instr', 'lay_instr', 'talk_on_phone_instr', 'carry_obj', 'throw_obj', 'catch_obj', 'cut_instr', 'cut_obj', 'run', 'work_on_computer_instr', 'ski_instr', 'surf_instr', 'skateboard_instr', 'smile', 'drink_instr', 'kick_obj', 'point_instr', 'read_obj', 'snowboard_instr' ] device = torch.device(args.device) dataset_val = build_dataset(image_set='val', args=args) sampler_val = torch.utils.data.SequentialSampler(dataset_val) data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val, drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers) args.lr_backbone = 0 args.masks = False backbone = build_backbone(args) transformer = build_transformer(args) model = DETRHOI(backbone, transformer, len(valid_obj_ids) + 1, len(verb_classes), args.num_queries) post_processor = PostProcessHOI(args.num_queries, args.subject_category_id, dataset_val.correct_mat) model.to(device) post_processor.to(device) checkpoint = torch.load(args.param_path, map_location='cpu') model.load_state_dict(checkpoint['model']) detections = generate(model, post_processor, data_loader_val, device, verb_classes, args.missing_category_id) with open(args.save_path, 'wb') as f: pickle.dump(detections, f, protocol=2)
def __init__(self, args, pixel_mean, pixel_std): super().__init__() self.backbone, self.loaded_layers = build_backbone(args) self.proposal_generator = build_proposal_generator(args) self.roi_heads = build_roi_heads(args) self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1)) self.args = args
def build(args): num_classes = 20 if args.dataset_file != 'coco' else 91 if args.dataset_file == "coco_panoptic": num_classes = 250 device = torch.device(args.device) backbone = build_backbone(args) transformer = build_transformer(args) model = DETR( args, backbone, transformer, num_classes=num_classes, num_queries=args.num_queries, aux_loss=args.aux_loss, ) if args.masks: model = DETRsegm(model, freeze_detr=(args.frozen_weights is not None)) matcher = build_matcher(args) weight_dict = {'loss_ce': 1, 'loss_bbox': args.bbox_loss_coef} weight_dict['loss_giou'] = args.giou_loss_coef if args.masks: weight_dict["loss_mask"] = args.mask_loss_coef weight_dict["loss_dice"] = args.dice_loss_coef # TODO this is a hack if args.aux_loss: aux_weight_dict = {} for i in range(args.dec_layers - 1): aux_weight_dict.update( {k + f'_{i}': v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ['labels', 'boxes', 'cardinality'] if args.masks: losses += ["masks"] criterion = SetCriterion(num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=args.eos_coef, losses=losses) criterion.to(device) postprocessors = {'bbox': PostProcess()} if args.masks: postprocessors['segm'] = PostProcessSegm() if args.dataset_file == "coco_panoptic": is_thing_map = {i: i <= 90 for i in range(201)} postprocessors["panoptic"] = PostProcessPanoptic(is_thing_map, threshold=0.85) return model, criterion, postprocessors
def __init__(self, backbone='mobilenet', output_stride=8, num_classes=1, sync_bn=True, freeze_bn=False): super(ShadowNetUncertaintyGuide, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, BatchNorm) self.temp_predict = nn.Sequential( nn.Conv2d(320, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) self.temp_uncertainty = nn.Sequential( nn.Conv2d(320, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), BatchNorm(256), nn.ReLU(), nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.reduce1 = LayerConv(320, 256, 1, 1, 0, False) self.dsc = DSC_Module(256, 256) self.reduce2 = LayerConv(512, 256, 1, 1, 0, False) self.decoder = build_decoder(num_classes, backbone, BatchNorm) # self.last_conv = nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), # BatchNorm(256), # nn.ReLU(), # # nn.Dropout(0.5), # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), # BatchNorm(256), # nn.ReLU(), # nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) if freeze_bn: self.freeze_bn()
def __init__(self, backbone='resnet18', in_channels=3, output_stride=16, num_classes=1, aux_classes=3, sync_bn=True, freeze_bn=False, pretrained=False, fusion_type='fusion', is_concat=False, **kwargs): super(PairwiseDeepLab, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, in_channels, output_stride, BatchNorm, pretrained) ## branch1 self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) ## branch2 # self.br2_aspp = build_aspp(backbone, output_stride, BatchNorm) # self.br2_decoder = build_decoder(num_classes, backbone, BatchNorm) ## fusion self.fusion_type = fusion_type if self.fusion_type == 'attention_fusion': print('fusion_type is attention_fusion') self.fusion = build_attention_fusion(aux_classes, backbone, BatchNorm, is_concat=is_concat) elif self.fusion_type == 'fusion': print('init fusion_type') self.fusion = build_fusion(aux_classes, backbone, BatchNorm, is_concat=is_concat) else: raise NotImplementedError if freeze_bn: self.freeze_bn()
def __init__(self, NoLabels, pretrained=False): super(MS_Deeplab, self).__init__() self.backbone = build_backbone('resnet', in_channel=4, pretrained=pretrained) self.classifier = _make_pred_layer(Classifier_Module, [6, 12, 18, 24], [6, 12, 18, 24], NoLabels) self.register_buffer( 'mean', torch.FloatTensor([0.485, 0.456, 0.406, 0]).view(1, 4, 1, 1)) self.register_buffer( 'std', torch.FloatTensor([0.229, 0.224, 0.225, 0.358]).view(1, 4, 1, 1))
def __init__(self, backbone='resnet', output_stride=16, num_classes=21, freeze_bn=False): super(DeepLab, self).__init__() BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone) self.freeze_bn = freeze_bn
def get_pose_net(cfg, is_train, **kwargs): extra = cfg.MODEL.EXTRA transformer = build_transformer(hidden_dim=extra.HIDDEN_DIM, dropout=extra.DROPOUT, nheads=extra.NHEADS, dim_feedforward=extra.DIM_FEEDFORWARD, enc_layers=extra.ENC_LAYERS, dec_layers=extra.DEC_LAYERS, pre_norm=extra.PRE_NORM) pretrained = is_train and cfg.MODEL.INIT_WEIGHTS backbone = build_backbone(cfg, pretrained) model = PoseTransformer(cfg, backbone, transformer, **kwargs) return model
def __init__(self, backbone='resnet', output_stride=16, num_classes=21, sync_bn=True, freeze_bn=False): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, BatchNorm) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) self.freeze_bn = freeze_bn
def __init__(self, backbone='resnet', n_in_channels=1, output_stride=16, num_classes=1, pretrained_backbone=False): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, n_in_channels, output_stride, BatchNorm, pretrained_backbone) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) return
def __init__(self, backbone='mobilenet', output_stride=16, num_classes=19, sync_bn=True, freeze_bn=False, mc_dropout=False, input_channels=3, pretrained=True): super(DeepLab, self).__init__() if sync_bn == True: batchnorm = SynchronizedBatchNorm2d else: batchnorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, batchnorm, mc_dropout, input_channels, pretrained) self.aspp = ASPP(backbone, output_stride, batchnorm) self.decoder = Decoder(num_classes, backbone, batchnorm, mc_dropout) self.return_features = False self.noisy_features = False self.model_name = 'deeplab' if freeze_bn: self.freeze_bn()
def __init__(self, backbone='resnet18', in_channels=3, output_stride=8, num_classes=1, sync_bn=True, freeze_bn=False, pretrained=False, **kwargs): super(ConsistentDeepLab, self).__init__() if backbone in ['drn', 'resnet18', 'resnet34']: output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, in_channels, output_stride, BatchNorm, pretrained) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) if freeze_bn: self.freeze_bn()
def get_pose_net(cfg, is_train, **kwargs): extra = cfg.MODEL.EXTRA transformer = build_deformable_transformer( hidden_dim=extra.HIDDEN_DIM, dropout=extra.DROPOUT, nheads=extra.NHEADS, dim_feedforward=extra.DIM_FEEDFORWARD, enc_layers=extra.ENC_LAYERS, dec_layers=extra.DEC_LAYERS, num_feature_levels=extra.NUM_FEATURE_LEVELS, enc_n_points=extra.ENC_N_POINTS, dec_n_points=extra.DEC_N_POINTS) pretrained = is_train and cfg.MODEL.INIT_WEIGHTS backbone = build_backbone(cfg, pretrained) model = DeformablePoseTransformer(cfg, backbone, transformer, **kwargs) return model
def __init__(self, model_config: dict): """ PANnet :param model_config: 模型配置 """ super().__init__() model_config = Dict(model_config) backbone_type = model_config.backbone.pop('type') neck_type = model_config.neck.pop('type') head_type = model_config.head.pop('type') self.normalize = Normalize([0.485 * 255, 0.456 * 255, 0.406 * 255], [0.229 * 255, 0.224 * 255, 0.225 * 255]) self.backbone = build_backbone(backbone_type, **model_config.backbone) self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck) self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head) self.name = f'{backbone_type}_{neck_type}_{head_type}'
def __init__( self, num_classes, backbone='resnet50', pretrained=True, pooling='avg_pooling', pooling_size=1, head='BNHead', bn_where='after', batch_norm_bias=True, use_tqdm=True, is_inference=False): super(Baseline, self).__init__() self.head_name = head self.num_classes = num_classes self.is_inference = is_inference self.backbone, feature_dim = build_backbone(backbone, pretrained=pretrained, progress=use_tqdm) self.global_pooling = build_pooling(pooling, pooling_size) self.head = build_head(head, feature_dim, self.num_classes, bias_freeze=not batch_norm_bias, bn_where=bn_where, pooling_size=pooling_size)
def __init__(self, backbone='resnet', output_stride=16, num_classes=21): super().__init__() self.backbone = build_backbone(backbone, output_stride) self.aspp = build_ASPP(backbone, output_stride) self.decoder = build_decoder(backbone, num_classes) self._init_weight()
def __init__(self, NoLabels, pretrained=False): super(Siam_Deeplab, self).__init__() self.backbone = build_backbone('resnet_ms', in_channel=3, pretrained=pretrained) self.aspp = build_aspp(output_stride=16) self.conv1_1 = nn.Sequential(nn.Conv2d(2048, 1024, kernel_size=1), nn.BatchNorm2d(1024), nn.ReLU(), nn.Conv2d(1024, 512, kernel_size=1), nn.BatchNorm2d(512), nn.ReLU()) self.branch = nn.Sequential( nn.Conv2d(1024, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, kernel_size=1), nn.BatchNorm2d(512), nn.ReLU()) self.fuse = nn.Sequential( nn.Conv2d(1024, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU()) self.fuse2 = nn.Sequential( nn.Conv2d(256 + 256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU()) self.fuse3 = nn.Sequential( nn.Conv2d(256 + 64, 256, kernel_size=1), #nn.Conv2d(256+256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.ReLU()) #self.template_refine= nn.Sequential( # #nn.Conv2d(48+256, 256, kernel_size=3, stride=1, padding=1, bias=False), # nn.Conv2d(1024, 256, kernel_size=7, stride=2, padding=3, bias=False), # nn.BatchNorm2d(256), # nn.ReLU(), # #nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True), # change # nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1, bias=False), # nn.BatchNorm2d(128), # nn.ReLU()) #self.template_fuse = nn.Sequential( # nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1, bias=False), # nn.BatchNorm2d(512), # nn.ReLU(), # #nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True), # change # nn.Conv2d(512, 64, kernel_size=3, stride=1, padding=1, bias=False), # nn.BatchNorm2d(64), # nn.ReLU()) # #nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)) self.conv_low_1 = nn.Sequential( nn.Conv2d(256, 48, kernel_size=1, bias=False), nn.BatchNorm2d(48), nn.ReLU()) #self.conv_low_1 = nn.Sequential( # nn.Conv2d(64, 48, kernel_size=1, bias=False), # nn.BatchNorm2d(48), # nn.ReLU()) self.refine = nn.Sequential( nn.Conv2d(256 + 48, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Dropout(0.5), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Dropout(0.1), nn.Conv2d(256, NoLabels, kernel_size=1)) """ self.predict = nn.Sequential( #nn.Conv2d(48+256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.Conv2d(128+64, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, NoLabels, kernel_size=1)) """ #self.register_buffer('mean', torch.FloatTensor([0.485, 0.456, 0.406, -0.329]).view(1,4,1,1)) #self.register_buffer('std', torch.FloatTensor([0.229, 0.224, 0.225, 0.051]).view(1,4,1,1)) self.register_buffer( 'mean', torch.FloatTensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) self.register_buffer( 'std', torch.FloatTensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) self._init_weight()
for k, v in model.state_dict().items(): print(k + ": " + str(v.shape)) parser = argparse.ArgumentParser("DETR training and evaluation script", parents=[get_args_parser()]) args = parser.parse_args() # print(backbone.state_dict()) # for k, v in backbone.state_dict().items(): # print(k + ": " + str(v.shape)) # for name, _ in backbone.named_sublayers(): # print(name) # print(backbone.backbone.body) with dg.guard(): backbone = build_backbone(args) fake_image = dg.to_variable( np.zeros([4, 3, 512, 512], dtype=np.float32)) mask = dg.to_variable(np.zeros([4, 512, 512], dtype=np.bool)) fake_data = NestedTensor(fake_image, mask) for k, v in backbone.state_dict().items(): print(k + ': ' + str(v.shape)) out, pos = backbone(fake_data) for feature_map in out: print(feature_map.tensors.shape) # [4, 2048, 16, 16] print(feature_map.mask.shape) # [4, 16, 16] for pos_tensor in pos: