def get_model(model_name, input, num_classes, keep_prob, gpu=0, drop=False): #gpu = 0 os.environ['CUDA_VISIBLE_DEVICES'] = gpu #print('MODEL', model_name) if model_name == "encoder" or model_name=="encoder-decoder": network = build_encoder_decoder_skip(input, num_classes) elif model_name == "deepUnet" or model_name == "deepunet" or model_name == "deepUNet": network = build_deepUnet(input, num_classes) elif model_name == "fpn" or model_name == "FPN" or model_name == "siamFPN": network = FPN(input, num_classes) network = network.model() elif model_name == "rfpn" or model_name == "RFPN" or model_name == "siamRPN": network = RFPN(input, num_classes) network = network.model() elif model_name == "siamese" or model_name == "siamSia" or model_name == "siamsia": network = build_siamSia(input, num_classes) elif model_name == "UNet" or model_name == "unet" or model_name == "Unet": network = build_unet2(input, num_classes=num_classes) elif model_name == "aunet" or model_name == "Aunet" or model_name == "attentionNet": network = build_AUnet(input, num_classes=num_classes) elif model_name == "deep": network = build_deep(input, num_classes, gpu) else: raise ValueError("Error: the model %d is not available. Try checking which models are available using the command python main.py --help", model_name) return network
def main(): DATA_ROOT = 'data/wbq/' GDnet = GazeDirectionNet() GDnet.load_state_dict( torch.load('train3.pth', map_location=torch.device('cpu'))) GDnet.eval() fpn_net = FPN() # fpn_net.load_state_dict(torch.load('fpn_net.pth',map_location=torch.device('cpu'))) # fpn_net.eval() pretrained_dict = torch.load('fpn_net.pth', map_location=torch.device('cpu')) model_dict = fpn_net.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) fpn_net.load_state_dict(model_dict) eval_set = SVIPDataset(root_dir=DATA_ROOT, ann_file='wbq_annotation.json') data_loader = DataLoader(eval_set, batch_size=1, shuffle=False, num_workers=1) # full = cv2.imread(DATA_ROOT + 'dinner.png') for i, data in tqdm(enumerate(data_loader)): eye = data['eye'] gaze_GT = data['gaze_positon'] GD_input = [data['head_image'], data['head_position']] output_direction = GDnet(GD_input) direction = transform_direction(output_direction[0]) gdf_1 = get_direction_field(eye[0], eye[1], direction, 1) gdf_2 = get_direction_field(eye[0], eye[1], direction, 2) gdf_5 = get_direction_field(eye[0], eye[1], direction, 5) fpn_input = torch.cat([data['image'][0], gdf_1, gdf_2, gdf_5]).unsqueeze(0) output_heatmap = fpn_net(fpn_input)[0][0] img = cv2.imread(DATA_ROOT + data['path'][0]) draw_temp(img, eye, gaze_GT, output_heatmap.detach().numpy(), 'wbq-' + str(i)) # draw_full(full,eye,output_heatmap.detach().numpy()) # draw_input(img,eye,str(i)) # img = cv2.imread(DATA_ROOT + data['path'][0]) # draw_direction(img,eye,output_direction[0],str(i)) img = cv2.imread(DATA_ROOT + data['path'][0]) draw_heatmap(img, output_heatmap.detach().numpy(), 'wbq-' + str(i))
def __init__(self, num_classes, fpn_features=256, ratios=None, scales=None, backbone='resnet50'): super(RetinaNet, self).__init__() self.anchor_ratios = [0.5, 1, 2] if ratios == None else ratios self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0) ] if scales == None else scales num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) self.num_classes = num_classes if backbone == 'resnet50': self.backbone = resnet50() if backbone == 'resnet101': self.backbone = resnet101() if backbone == 'resnet50_cbam': self.backbone = resnet50_cbam() if backbone == 'resnet101_cbam': self.backbone = resnet101_cbam() self.fpn = FPN(features=fpn_features) self.classifier = Classifier(in_channels=fpn_features, num_anchors=num_anchors, num_classes=num_classes) self.regressor = Regressor(in_channels=fpn_features, num_anchors=num_anchors) self.anchors = Anchor()
class RetinaNet(nn.Module): """Base class for single-stage detectors. Single-stage detectors directly and densely predict bounding boxes on the output features of the backbone+neck. """ def __init__(self, pretrained=None): super(RetinaNet, self).__init__() self.backbone = ResNet() self.neck = FPN() self.bbox_head = RetinaHead() self.init_weights() def init_weights(self): self.backbone.load_state_dict(torch.load( '/Users/nick/.cache/torch/checkpoints/resnet50-19c8e357.pth'), strict=False) self.neck.init_weights() self.bbox_head.init_weights() def extract_feat(self, img): """Directly extract features from the backbone+neck """ x = self.backbone(img) x = self.neck(x) return x def forward_train(self, img, img_metas, gt_bboxes, gt_labels): x = self.extract_feat(img) outs = self.bbox_head(x) loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) losses = self.bbox_head.loss(*loss_inputs) return losses def forward(self, img, img_meta, gt_bboxes, gt_labels): """ Calls either forward_train or forward_test depending on whether return_loss=True. Note this setting will change the expected inputs. When `return_loss=True`, img and img_meta are single-nested (i.e. Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta should be double nested (i.e. List[Tensor], List[List[dict]]), with the outer list indicating test time augmentations. """ return self.forward_train(img, img_meta, gt_bboxes, gt_labels)
def __init__(self, training=True, fpn_channel=256, class_num=80, anchor_num=9): super(RetinaNet, self).__init__() self.resnet = resnet50(pretrained=training) self.fpn = FPN(fpn_channel=256) self.classifier = Classifier(fpn_channel, class_num, anchor_num) self.localizer = Localizer(fpn_channel, anchor_num) self.initialization()
def __init__(self, in_dim=3, num_classes=20): super(RetinaNet, self).__init__() # define params self.fpn = FPN( version="resnet50") # use ResNet50 FPN extract feature maps self.in_dim = in_dim self.num_classes = num_classes self.num_anchors = 9 # cls & regression branches self.loc_head = self.make_head(self.num_anchors * 4) self.cls_head = self.make_head(self.num_anchors * self.num_classes)
def build(): def head(): def conv_uniform(in_channels, out_channels, k_size, stride=1, dilated=1, is_bn=False): conv = nn.Conv2d(in_channels, out_channels, kernel_size=k_size, stride=stride, padding=dilated * (k_size - 1) // 2, dilation=dilated, bias=False if is_bn else True) nn.init.kaiming_uniform_(conv.weight, a=1) module = [ conv, ] if is_bn: module.append(nn.BatchNorm2d(out_channels)) if len(module) > 1: return nn.Sequential(*module) return conv return conv_uniform body = resnet50() in_channels_ = 256 out_channels = 256 * 4 in_channel_p6p7 = in_channels_ * 8 fpn = FPN(in_channels_list=[ 0, in_channels_ * 2, in_channels_ * 4, in_channels_ * 8, ], out_channels=out_channels, conv_block=head(), top_blocks=LastLevelP6P7(in_channel_p6p7, out_channels)) model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) model.out_channels = out_channels return model
def __init__(self, layers, block, num_classes): super(ResNet, self).__init__() self.training = True self.in_plane = 64 self.conv1 = ConvBlock(3, 64, kernel=7, stride=2, pad=3, bias=False) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Needs to be converted toa for loop self.conv_2 = self._make_layers(block, layers[0], 64) self.conv_3 = self._make_layers(block, layers[1], 128, stride=2) self.conv_4 = self._make_layers(block, layers[2], 256, stride=2) self.conv_5 = self._make_layers(block, layers[3], 512, stride=2) # Feature Pyramid Network self.fpn = FPN([128, 256, 512]) # Regression Model self.regressionModel = RegressionModel(256) self.classificationModel = ClassificationModel(256, num_classes=num_classes) # Anchors self.anchors = RPN() # Focal Loss self.focalLoss = FocalLoss() # Utils Function self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self._init_weights()
def __init__(self, num_classes=2): super(Detector, self).__init__() self.fpn = FPN() self.num_classes = num_classes self.loc_head = self._make_head(self.num_anchors * 4) self.cls_head = self._make_head(self.num_anchors * self.num_classes)
def __init__(self, pretrained=None): super(RetinaNet, self).__init__() self.backbone = ResNet() self.neck = FPN() self.bbox_head = RetinaHead() self.init_weights()
def make_fpn_efficientnet(name: str = 'efficientnet_b0', fpn_type: str = 'fpn', out_size: Tuple[int, int] = (224, 224), fpn_channels: int = 256, num_classes: int = 1000, pretrained: Optional[str] = 'imagenet', in_channels: str = 3) -> nn.Module: """Loads the PyTorch implementation of EfficientNet from https://github.com/lukemelas/EfficientNet-PyTorch using torch.hub. Args: name (str, optional): Name of the EfficientNet backbone. Only those available in the lukemelas/EfficientNet-PyTorch repos are supported. Defaults to 'efficientnet_b0'. fpn_type (str, optional): Type of FPN. 'fpn' | 'panoptic' | 'panet'. Defaults to 'fpn'. out_size (Tuple[int, int], optional): Size of segmentation output. Defaults to (224, 224). fpn_channels (int, optional): Number of hidden channels to use in the FPN. Defaults to 256. num_classes (int, optional): Number of classes for which to make predictions. Determines the channel width of the output. Defaults to 1000. pretrained (Optional[str], optional): One of None | 'imagenet' | 'advprop'. See lukemelas/EfficientNet-PyTorch for details. Defaults to True. in_channels (int, optional): Channel width of the input. If greater than 3, a parallel backbone is added to incorporate the new channels and the feature maps of the two backbones are added together to produce the final feature maps. Note that this is currently different from make_fpn_resnet. See lukemelas/EfficientNet-PyTorch for the in_channels < 3 case. Defaults to 3. Raises: NotImplementedError: On unknown fpn_style. Returns: nn.Module: the FPN model """ effnet = _load_efficientnet(name=name, num_classes=num_classes, pretrained=pretrained) if in_channels > 3: new_channels = in_channels - 3 new_effnet = _load_efficientnet( name=name, num_classes=num_classes, pretrained=pretrained, in_channels=new_channels, ) backbone = nn.Sequential( SplitTensor((3, new_channels), dim=1), Parallel([ EfficientNetFeatureMapsExtractor(effnet), EfficientNetFeatureMapsExtractor(new_effnet) ]), AddAcross()) else: backbone = EfficientNetFeatureMapsExtractor(effnet) feat_shapes = _get_shapes(backbone, channels=in_channels, size=out_size) if fpn_type == 'fpn': fpn = nn.Sequential( FPN(feat_shapes, hidden_channels=fpn_channels, out_channels=num_classes), SelectOne(idx=0)) elif fpn_type == 'panoptic': fpn = PanopticFPN(feat_shapes, hidden_channels=fpn_channels, out_channels=num_classes) elif fpn_type == 'panet+fpn': feat_shapes2 = [(n, fpn_channels, h, w) for (n, c, h, w) in feat_shapes] fpn = nn.Sequential( PANetFPN(feat_shapes, hidden_channels=fpn_channels, out_channels=fpn_channels), FPN(feat_shapes2, hidden_channels=fpn_channels, out_channels=num_classes), SelectOne(idx=0)) else: raise NotImplementedError() # yapf: disable model = nn.Sequential( backbone, fpn, Interpolate(size=out_size, mode='bilinear', align_corners=False)) # yapf: enable return model
for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) if m.bias is not None: nn.init.constant_(m.bias, val=0) def forward(self, x): x = self.reg_head(x) return x if __name__ == '__main__': image_h, image_w = 640, 640 from fpn import FPN fpn_model = FPN(512, 1024, 2048, 256) C3, C4, C5 = torch.randn(3, 512, 80, 80), torch.randn(3, 1024, 40, 40), torch.randn( 3, 2048, 20, 20) features = fpn_model([C3, C4, C5]) print("1111", features[0].shape) cls_model = ClsHead(256, 9, 80) reg_model = RegHead(256, 9) cls_output = cls_model(features[0]) reg_output = reg_model(features[0]) print("2222", cls_output.shape, reg_output.shape)
shuffle=True, num_workers=4 ) testloader = torch.utils.data.DataLoader( testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4 ) net = None if args.depth == 50: teacher_net = resnet_bl.resnet50(num_classes=args.class_num) teacher_fpn = FPN(in_channels=[256,512,1024,2048], out_channels=256, num_outs=5) teacher_fpn_head = FPNHead() print("using resnet 50") net.to(device) fpn.to(device) fpn_head.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=LR, weight_decay=5e-4, momentum=0.9) if args.multigpu: net = torch.nn.DataParallel(net.cuda()) if __name__ == "__main__": best_acc = 0
def __init__(self, mode, rpn_anchor_ratios, rpn_anchor_scales, mask_shape, pool_size, image_shape, mini_mask_shape, backbone_strides, mean_pixel, roi_size=7, backbone='resnet50', stage5=True, norm='batch', use_bias=True, rpn_anchor_stride=1, image_per_gpu=1, gpu_count=1, detection_max_instances=100, train_rois_per_image=200, num_classes=1, use_mini_mask=True, use_pretrained_model=True, top_down_pyramid_size=256, post_nms_rois_training=2000, post_nms_rois_inference=1000, pre_nms_limit=6000, rpn_nms_threshold=0.7, use_rpn_rois=True, model_dir=None, optimizer_method='Adam', learning_rate=0.001, momentum=0.9, weight_decay=0.0001, image_min_dim=800, image_max_dim=1024, image_min_scale=0.0, image_resize_mode='square', max_gt_instances=100, rpn_train_anchors_per_image=256): assert mode in ['training', 'inference'] assert optimizer_method in ['Adam', 'SGD'] tf.reset_default_graph() self.graph = tf.Graph() self.mode = mode self.rpn_anchor_ratios = rpn_anchor_ratios self.rpn_anchor_scales = rpn_anchor_scales self.mask_shape = mask_shape self.pool_size = pool_size self.image_shape = np.array(image_shape) self.mini_mask_shape = mini_mask_shape self.backbone_strides = backbone_strides self.mean_pixel = mean_pixel self.roi_size = roi_size self.backbone = backbone self.stage5 = stage5 self.norm = norm self.use_bias = use_bias self.rpn_anchor_stride = rpn_anchor_stride self.image_per_gpu = image_per_gpu self.gpu_count = gpu_count self.detection_max_instances = detection_max_instances self.train_rois_per_image = train_rois_per_image self.num_classes = num_classes self.use_mini_mask = use_mini_mask self.use_pretrained_model = use_pretrained_model self.top_down_pyramid_size = top_down_pyramid_size self.post_nms_rois_training = post_nms_rois_training self.post_nms_rois_inference = post_nms_rois_inference self.pre_nms_limit = pre_nms_limit self.rpn_nms_threshold = rpn_nms_threshold self.use_rpn_rois = use_rpn_rois self.model_dir = model_dir self.optimizer_method = optimizer_method self.learning_rate = learning_rate self.momentum = momentum self.weight_decay = weight_decay self.image_min_dim = image_min_dim self.image_max_dim = image_max_dim self.image_min_scale = image_min_scale self.image_resize_mode = image_resize_mode self.max_gt_instances = max_gt_instances self.rpn_train_anchors_per_image = rpn_train_anchors_per_image self.image_meta_size = 1 + 3 + 3 + 4 + 1 + self.num_classes self.reuse = False self._anchor_cache = {} self.batch_size = self.gpu_count * self.image_per_gpu self.backbone_shape = utils.compute_backbone_shapes( self.backbone, self.backbone_strides, self.image_shape) self.num_anchors_per_image = len(self.rpn_anchor_ratios) * ( self.backbone_shape[0][0] * self.backbone_shape[0][0] + self.backbone_shape[1][0] * self.backbone_shape[1][0] + self.backbone_shape[2][0] * self.backbone_shape[2][0] + self.backbone_shape[3][0] * self.backbone_shape[3][0] + self.backbone_shape[4][0] * self.backbone_shape[4][0]) with self.graph.as_default(): self.is_training = tf.placeholder_with_default(False, []) self.input_image = tf.placeholder(dtype=tf.float32, shape=[ None, self.image_shape[0], self.image_shape[1], self.image_shape[2] ], name='input_image') self.input_image_meta = tf.placeholder( dtype=tf.int32, shape=[None, self.image_meta_size], name='input_image_meta') if mode == 'training': self.input_rpn_match = tf.placeholder( dtype=tf.int32, shape=[None, self.num_anchors_per_image, 1], name='input_rpn_match') self.input_rpn_boxes = tf.placeholder( dtype=tf.float32, shape=[None, self.rpn_train_anchors_per_image, 4], name='input_rpn_boxes') self.input_gt_class_ids = tf.placeholder( dtype=tf.int32, shape=[None, self.max_gt_instances], name='input_gt_class_ids') self.input_gt_boxes = tf.placeholder( dtype=tf.float32, shape=[None, self.max_gt_instances, 4], name='input_gt_boxes') self.input_gt_boxes_normalized = utils.norm_boxes_graph( self.input_gt_boxes, tf.shape(self.input_image)[1:3]) self.proposal_count = self.post_nms_rois_training if self.use_mini_mask: self.input_gt_masks = tf.placeholder( dtype=tf.bool, shape=[ None, self.mini_mask_shape[0], self.mini_mask_shape[1], self.max_gt_instances ], name='input_gt_mask') else: self.input_gt_masks = tf.placeholder( dtype=tf.bool, shape=[ None, self.image_shape[0], self.image_shape[1], self.max_gt_instances ], name='input_gt_mask') elif mode == 'inference': self.input_anchors = tf.placeholder(dtype=tf.float32, shape=[None, None, 4], name='input_anchors') self.proposal_count = self.post_nms_rois_inference self.resnet = Resnet(name='resnet', architecture=self.backbone, is_training=self.is_training, stage5=self.stage5, use_bias=self.use_bias) arg_scope = nets.resnet_v2.resnet_arg_scope() with slim.arg_scope(arg_scope): _, self.end_points = nets.resnet_v2.resnet_v2_50( self.input_image, num_classes=None, is_training=self.is_training) self.fpn = FPN(name='fpn', top_down_pyramid_size=self.top_down_pyramid_size, use_bias=self.use_bias) self.rpn = RPN(name='rpn', anchors_per_location=len(self.rpn_anchor_ratios), anchor_stride=self.rpn_anchor_stride, is_training=self.is_training, use_bias=self.use_bias) self.proposal = ProposalLayer(self.pre_nms_limit, self.proposal_count, self.rpn_nms_threshold, self.image_per_gpu) self.pyramidRoiPooling = PyramidRoiPooling( name='PyramidRoiPooling', roi_size=self.roi_size) self.objDetection = ObjDetection( image_per_gpu=self.image_per_gpu, gpu_count=self.gpu_count, detection_max_instances=self.detection_max_instances) self.targetDetection = TargetDetection( mask_shape=self.mask_shape, image_per_gpu=self.image_per_gpu, train_rois_per_image=self.train_rois_per_image) self.fpnClassifier = FpnClassifier('FpnClassifier', pool_size=self.pool_size, num_classes=self.num_classes, is_training=self.is_training) self.fpnMask = FpnMask('FpnMask', num_classes=self.num_classes, is_training=self.is_training)
shuffle=True, num_workers=4 ) testloader = torch.utils.data.DataLoader( testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4 ) net = None if args.depth == 50: net = resnet_bl.resnet50(num_classes=args.class_num) fpn = FPN(in_channels=[256,512,1024,2048], out_channels=256, num_outs=5) fpn_head = FPNHead() print("using resnet 50") net.to(device) fpn.to(device) fpn_head.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=LR, weight_decay=5e-4, momentum=0.9) if args.multigpu: net = torch.nn.DataParallel(net.cuda()) if __name__ == "__main__": best_acc = 0
from pycocotools.cocoeval import COCOeval import json os.environ["CUDA_VISIBLE_DEVICES"] = "5" device = torch.device('cuda:0') dataset_train = CocoDataset('../dataset', set_name='val2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset('../dataset', set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) fpn = FPN() net = Net() anchors = Anchors() fpn = fpn.to(device) net = net.to(device) criterion = AnchorBasedLoss() optimizer1 = optim.Adam(fpn.parameters(), lr=1e-4) optimizer2 = optim.Adam(net.parameters(), lr=1e-4) def train(): num = len(dataloader_train) * 2
def make_fpn_resnet(name: str = 'resnet18', fpn_type: str = 'fpn', out_size: Tuple[int, int] = (224, 224), fpn_channels: int = 256, num_classes: int = 1000, pretrained: bool = True, in_channels: int = 3) -> nn.Module: """Create an FPN model with a ResNet backbone. If `in_channels > 3`, uses the fusion technique described in the paper, *FuseNet*, by Hazirbas et al. (https://vision.in.tum.de/_media/spezial/bib/hazirbasma2016fusenet.pdf) that adds a parallel resnet backbone for the new channels. All the pretrained weights are retained. Args: name (str, optional): Name of the resnet backbone. Only those available in torchvision are supported. Defaults to 'resnet18'. fpn_type (str, optional): Type of FPN. 'fpn' | 'panoptic' | 'panet'. Defaults to 'fpn'. out_size (Tuple[int, int], optional): Size of segmentation output. Defaults to (224, 224). fpn_channels (int, optional): Number of hidden channels to use in the FPN. Defaults to 256. num_classes (int, optional): Number of classes for which to make predictions. Determines the channel width of the output. Defaults to 1000. pretrained (bool, optional): Whether to use pretrained backbone. Defaults to True. in_channels (int, optional): Channel width of the input. If less than 3, conv1 is replaced with a smaller one. If greater than 3, a FuseNet-style architecture is used to incorporate the new channels. In both cases, pretrained weights are retained. Defaults to 3. Raises: NotImplementedError: On unknown fpn_style. Returns: nn.Module: the FPN model """ assert in_channels > 0 assert num_classes > 0 assert out_size[0] > 0 and out_size[1] > 0 resnet = tv.models.resnet.__dict__[name](pretrained=pretrained) if in_channels == 3: backbone = ResNetFeatureMapsExtractor(resnet) else: old_conv = resnet.conv1 old_conv_args = { 'out_channels': old_conv.out_channels, 'kernel_size': old_conv.kernel_size, 'stride': old_conv.stride, 'padding': old_conv.padding, 'dilation': old_conv.dilation, 'groups': old_conv.groups, 'bias': old_conv.bias } if not pretrained: # just replace the first conv layer new_conv = nn.Conv2d(in_channels=in_channels, **old_conv_args) resnet.conv1 = new_conv backbone = ResNetFeatureMapsExtractor(resnet) else: if in_channels > 3: new_channels = in_channels - 3 new_conv = nn.Conv2d(in_channels=new_channels, **old_conv_args) resnet_cls = tv.models.resnet.__dict__[name] new_resnet = resnet_cls(pretrained=pretrained) new_resnet.conv1 = copy_conv_weights(old_conv, new_conv) backbone = make_fusion_resnet_backbone(resnet, new_resnet) else: new_conv = nn.Conv2d(in_channels=in_channels, **old_conv_args) resnet.conv1 = copy_conv_weights(old_conv, new_conv) backbone = ResNetFeatureMapsExtractor(resnet) feat_shapes = _get_shapes(backbone, channels=in_channels, size=out_size) if fpn_type == 'fpn': fpn = nn.Sequential( FPN(feat_shapes, hidden_channels=fpn_channels, out_channels=num_classes), SelectOne(idx=0)) elif fpn_type == 'panoptic': fpn = PanopticFPN(feat_shapes, hidden_channels=fpn_channels, out_channels=num_classes) elif fpn_type == 'panet': fpn1 = FPN(feat_shapes, hidden_channels=fpn_channels, out_channels=fpn_channels) feat_shapes = [(n, fpn_channels, h, w) for (n, c, h, w) in feat_shapes] fpn2 = FPN(feat_shapes[::-1], hidden_channels=fpn_channels, out_channels=num_classes) fpn = nn.Sequential(PANetFPN(fpn1, fpn2), SelectOne(idx=0)) else: raise NotImplementedError() # yapf: disable model = nn.Sequential( backbone, fpn, Interpolate(size=out_size, mode='bilinear', align_corners=True)) # yapf: enable return model
def __init__(self, block, layers, fpn_in_channels, fpn_out_channels, fpn_num_outs, num_classes=100, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.scala4 = nn.AvgPool2d(4, 4) self.fc4 = nn.Linear(512 * block.expansion, num_classes) self.fpn = FPN(in_channels=fpn_in_channels, out_channels=fpn_out_channels, num_outs=fpn_num_outs) self.fpn_head = FPNHead(num_classes=num_classes, n_maps=fpn_num_outs) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
# -*- coding: utf-8 -*- """ @File : fpn_test.py @Time : 12/12/20 9:40 PM @Author : Mingqiang Ning @Email : [email protected] @Modify Time @Version @Description ------------ -------- ----------- 12/12/20 9:40 PM 1.0 None # @Software: PyCharm """ import torch from fpn import FPN net=FPN([3,4,6,3]).cuda() print(net) input=torch.randn(1,3,224,224).cuda() output=net(input)