def get_model(model_name, input, num_classes, keep_prob, gpu=0, drop=False):
    #gpu = 0
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu
    #print('MODEL', model_name)
    if model_name == "encoder" or model_name=="encoder-decoder":
        network = build_encoder_decoder_skip(input, num_classes)
    elif model_name == "deepUnet" or model_name == "deepunet" or model_name == "deepUNet":
        network = build_deepUnet(input, num_classes)
    elif model_name == "fpn" or model_name == "FPN" or model_name == "siamFPN":
        network = FPN(input, num_classes)
        network = network.model()
    elif model_name == "rfpn" or model_name == "RFPN" or model_name == "siamRPN":
        network = RFPN(input, num_classes)
        network = network.model()
    elif model_name == "siamese" or model_name == "siamSia" or model_name == "siamsia":
        network = build_siamSia(input, num_classes)
    elif model_name == "UNet" or model_name == "unet" or model_name == "Unet":
        network = build_unet2(input, num_classes=num_classes)
    elif model_name == "aunet" or model_name == "Aunet" or model_name == "attentionNet":
        network = build_AUnet(input, num_classes=num_classes)
    elif model_name == "deep":
        network = build_deep(input, num_classes, gpu)
    else:
        raise ValueError("Error: the model %d is not available. Try checking which models are available using the command python main.py --help", model_name)
    return network
Ejemplo n.º 2
0
def main():
    DATA_ROOT = 'data/wbq/'

    GDnet = GazeDirectionNet()
    GDnet.load_state_dict(
        torch.load('train3.pth', map_location=torch.device('cpu')))
    GDnet.eval()

    fpn_net = FPN()
    # fpn_net.load_state_dict(torch.load('fpn_net.pth',map_location=torch.device('cpu')))
    # fpn_net.eval()

    pretrained_dict = torch.load('fpn_net.pth',
                                 map_location=torch.device('cpu'))
    model_dict = fpn_net.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    fpn_net.load_state_dict(model_dict)

    eval_set = SVIPDataset(root_dir=DATA_ROOT, ann_file='wbq_annotation.json')
    data_loader = DataLoader(eval_set,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1)

    # full = cv2.imread(DATA_ROOT + 'dinner.png')
    for i, data in tqdm(enumerate(data_loader)):
        eye = data['eye']
        gaze_GT = data['gaze_positon']

        GD_input = [data['head_image'], data['head_position']]
        output_direction = GDnet(GD_input)
        direction = transform_direction(output_direction[0])
        gdf_1 = get_direction_field(eye[0], eye[1], direction, 1)
        gdf_2 = get_direction_field(eye[0], eye[1], direction, 2)
        gdf_5 = get_direction_field(eye[0], eye[1], direction, 5)
        fpn_input = torch.cat([data['image'][0], gdf_1, gdf_2,
                               gdf_5]).unsqueeze(0)
        output_heatmap = fpn_net(fpn_input)[0][0]

        img = cv2.imread(DATA_ROOT + data['path'][0])
        draw_temp(img, eye, gaze_GT,
                  output_heatmap.detach().numpy(), 'wbq-' + str(i))
        # draw_full(full,eye,output_heatmap.detach().numpy())
        # draw_input(img,eye,str(i))
        # img = cv2.imread(DATA_ROOT + data['path'][0])
        # draw_direction(img,eye,output_direction[0],str(i))
        img = cv2.imread(DATA_ROOT + data['path'][0])
        draw_heatmap(img, output_heatmap.detach().numpy(), 'wbq-' + str(i))
Ejemplo n.º 3
0
    def __init__(self,
                 num_classes,
                 fpn_features=256,
                 ratios=None,
                 scales=None,
                 backbone='resnet50'):
        super(RetinaNet, self).__init__()

        self.anchor_ratios = [0.5, 1, 2] if ratios == None else ratios
        self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)
                              ] if scales == None else scales
        num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)

        self.num_classes = num_classes

        if backbone == 'resnet50':
            self.backbone = resnet50()
        if backbone == 'resnet101':
            self.backbone = resnet101()
        if backbone == 'resnet50_cbam':
            self.backbone = resnet50_cbam()
        if backbone == 'resnet101_cbam':
            self.backbone = resnet101_cbam()

        self.fpn = FPN(features=fpn_features)
        self.classifier = Classifier(in_channels=fpn_features,
                                     num_anchors=num_anchors,
                                     num_classes=num_classes)
        self.regressor = Regressor(in_channels=fpn_features,
                                   num_anchors=num_anchors)
        self.anchors = Anchor()
Ejemplo n.º 4
0
class RetinaNet(nn.Module):
    """Base class for single-stage detectors.

    Single-stage detectors directly and densely predict bounding boxes on the
    output features of the backbone+neck.
    """
    def __init__(self, pretrained=None):
        super(RetinaNet, self).__init__()
        self.backbone = ResNet()
        self.neck = FPN()
        self.bbox_head = RetinaHead()
        self.init_weights()

    def init_weights(self):
        self.backbone.load_state_dict(torch.load(
            '/Users/nick/.cache/torch/checkpoints/resnet50-19c8e357.pth'),
                                      strict=False)
        self.neck.init_weights()
        self.bbox_head.init_weights()

    def extract_feat(self, img):
        """Directly extract features from the backbone+neck
        """
        x = self.backbone(img)
        x = self.neck(x)
        return x

    def forward_train(self, img, img_metas, gt_bboxes, gt_labels):
        x = self.extract_feat(img)
        outs = self.bbox_head(x)
        loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
        losses = self.bbox_head.loss(*loss_inputs)
        return losses

    def forward(self, img, img_meta, gt_bboxes, gt_labels):
        """
        Calls either forward_train or forward_test depending on whether
        return_loss=True. Note this setting will change the expected inputs.
        When `return_loss=True`, img and img_meta are single-nested (i.e.
        Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
        should be double nested (i.e.  List[Tensor], List[List[dict]]), with
        the outer list indicating test time augmentations.
        """
        return self.forward_train(img, img_meta, gt_bboxes, gt_labels)
Ejemplo n.º 5
0
    def __init__(self,
                 training=True,
                 fpn_channel=256,
                 class_num=80,
                 anchor_num=9):
        super(RetinaNet, self).__init__()
        self.resnet = resnet50(pretrained=training)
        self.fpn = FPN(fpn_channel=256)
        self.classifier = Classifier(fpn_channel, class_num, anchor_num)
        self.localizer = Localizer(fpn_channel, anchor_num)

        self.initialization()
Ejemplo n.º 6
0
    def __init__(self, in_dim=3, num_classes=20):
        super(RetinaNet, self).__init__()

        # define params
        self.fpn = FPN(
            version="resnet50")  # use ResNet50 FPN extract feature maps
        self.in_dim = in_dim
        self.num_classes = num_classes
        self.num_anchors = 9
        # cls & regression branches
        self.loc_head = self.make_head(self.num_anchors * 4)
        self.cls_head = self.make_head(self.num_anchors * self.num_classes)
Ejemplo n.º 7
0
def build():
    def head():
        def conv_uniform(in_channels,
                         out_channels,
                         k_size,
                         stride=1,
                         dilated=1,
                         is_bn=False):

            conv = nn.Conv2d(in_channels,
                             out_channels,
                             kernel_size=k_size,
                             stride=stride,
                             padding=dilated * (k_size - 1) // 2,
                             dilation=dilated,
                             bias=False if is_bn else True)
            nn.init.kaiming_uniform_(conv.weight, a=1)
            module = [
                conv,
            ]
            if is_bn:
                module.append(nn.BatchNorm2d(out_channels))
            if len(module) > 1:
                return nn.Sequential(*module)
            return conv

        return conv_uniform

    body = resnet50()
    in_channels_ = 256
    out_channels = 256 * 4

    in_channel_p6p7 = in_channels_ * 8
    fpn = FPN(in_channels_list=[
        0,
        in_channels_ * 2,
        in_channels_ * 4,
        in_channels_ * 8,
    ],
              out_channels=out_channels,
              conv_block=head(),
              top_blocks=LastLevelP6P7(in_channel_p6p7, out_channels))
    model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
    model.out_channels = out_channels
    return model
Ejemplo n.º 8
0
    def __init__(self, layers, block, num_classes):
        super(ResNet, self).__init__()

        self.training = True

        self.in_plane = 64

        self.conv1 = ConvBlock(3, 64, kernel=7, stride=2, pad=3, bias=False)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Needs to be converted toa for loop
        self.conv_2 = self._make_layers(block, layers[0], 64)
        self.conv_3 = self._make_layers(block, layers[1], 128, stride=2)
        self.conv_4 = self._make_layers(block, layers[2], 256, stride=2)
        self.conv_5 = self._make_layers(block, layers[3], 512, stride=2)

        # Feature Pyramid Network
        self.fpn = FPN([128, 256, 512])

        # Regression Model
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)

        # Anchors
        self.anchors = RPN()

        # Focal Loss
        self.focalLoss = FocalLoss()

        # Utils Function
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        self._init_weights()
Ejemplo n.º 9
0
 def __init__(self, num_classes=2):
     super(Detector, self).__init__()
     self.fpn = FPN()
     self.num_classes = num_classes
     self.loc_head = self._make_head(self.num_anchors * 4)
     self.cls_head = self._make_head(self.num_anchors * self.num_classes)
Ejemplo n.º 10
0
 def __init__(self, pretrained=None):
     super(RetinaNet, self).__init__()
     self.backbone = ResNet()
     self.neck = FPN()
     self.bbox_head = RetinaHead()
     self.init_weights()
Ejemplo n.º 11
0
def make_fpn_efficientnet(name: str = 'efficientnet_b0',
                          fpn_type: str = 'fpn',
                          out_size: Tuple[int, int] = (224, 224),
                          fpn_channels: int = 256,
                          num_classes: int = 1000,
                          pretrained: Optional[str] = 'imagenet',
                          in_channels: str = 3) -> nn.Module:
    """Loads the PyTorch implementation of EfficientNet from
    https://github.com/lukemelas/EfficientNet-PyTorch using torch.hub.

    Args:
        name (str, optional): Name of the EfficientNet backbone. Only those
            available in the lukemelas/EfficientNet-PyTorch repos are
            supported. Defaults to 'efficientnet_b0'.
        fpn_type (str, optional): Type of FPN. 'fpn' | 'panoptic' | 'panet'.
            Defaults to 'fpn'.
        out_size (Tuple[int, int], optional): Size of segmentation output.
            Defaults to (224, 224).
        fpn_channels (int, optional): Number of hidden channels to use in the
            FPN. Defaults to 256.
        num_classes (int, optional): Number of classes for which to make
            predictions. Determines the channel width of the output.
            Defaults to 1000.
        pretrained (Optional[str], optional): One of
            None | 'imagenet' | 'advprop'. See lukemelas/EfficientNet-PyTorch
            for details. Defaults to True.
        in_channels (int, optional): Channel width of the input. If greater
            than 3, a parallel backbone is added to incorporate the new
            channels and the feature maps of the two backbones are added
            together to produce the final feature maps. Note that this is
            currently different from make_fpn_resnet. See
            lukemelas/EfficientNet-PyTorch for the in_channels < 3 case.
            Defaults to 3.

    Raises:
        NotImplementedError: On unknown fpn_style.

    Returns:
        nn.Module: the FPN model
    """
    effnet = _load_efficientnet(name=name,
                                num_classes=num_classes,
                                pretrained=pretrained)

    if in_channels > 3:
        new_channels = in_channels - 3
        new_effnet = _load_efficientnet(
            name=name,
            num_classes=num_classes,
            pretrained=pretrained,
            in_channels=new_channels,
        )
        backbone = nn.Sequential(
            SplitTensor((3, new_channels), dim=1),
            Parallel([
                EfficientNetFeatureMapsExtractor(effnet),
                EfficientNetFeatureMapsExtractor(new_effnet)
            ]), AddAcross())
    else:
        backbone = EfficientNetFeatureMapsExtractor(effnet)

    feat_shapes = _get_shapes(backbone, channels=in_channels, size=out_size)
    if fpn_type == 'fpn':
        fpn = nn.Sequential(
            FPN(feat_shapes,
                hidden_channels=fpn_channels,
                out_channels=num_classes), SelectOne(idx=0))
    elif fpn_type == 'panoptic':
        fpn = PanopticFPN(feat_shapes,
                          hidden_channels=fpn_channels,
                          out_channels=num_classes)
    elif fpn_type == 'panet+fpn':
        feat_shapes2 = [(n, fpn_channels, h, w)
                        for (n, c, h, w) in feat_shapes]
        fpn = nn.Sequential(
            PANetFPN(feat_shapes,
                     hidden_channels=fpn_channels,
                     out_channels=fpn_channels),
            FPN(feat_shapes2,
                hidden_channels=fpn_channels,
                out_channels=num_classes), SelectOne(idx=0))
    else:
        raise NotImplementedError()
    # yapf: disable
    model = nn.Sequential(
        backbone,
        fpn,
        Interpolate(size=out_size, mode='bilinear', align_corners=False))
    # yapf: enable
    return model
Ejemplo n.º 12
0
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, val=0)

    def forward(self, x):
        x = self.reg_head(x)

        return x


if __name__ == '__main__':
    image_h, image_w = 640, 640
    from fpn import FPN
    fpn_model = FPN(512, 1024, 2048, 256)
    C3, C4, C5 = torch.randn(3, 512, 80, 80), torch.randn(3, 1024, 40,
                                                          40), torch.randn(
                                                              3, 2048, 20, 20)
    features = fpn_model([C3, C4, C5])

    print("1111", features[0].shape)

    cls_model = ClsHead(256, 9, 80)
    reg_model = RegHead(256, 9)

    cls_output = cls_model(features[0])
    reg_output = reg_model(features[0])

    print("2222", cls_output.shape, reg_output.shape)
Ejemplo n.º 13
0
    shuffle=True,
    num_workers=4
)
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4
)

net = None


if args.depth == 50:
    teacher_net = resnet_bl.resnet50(num_classes=args.class_num)
    teacher_fpn = FPN(in_channels=[256,512,1024,2048], out_channels=256, num_outs=5)
    teacher_fpn_head = FPNHead()
    print("using resnet 50")


net.to(device)
fpn.to(device)
fpn_head.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, weight_decay=5e-4, momentum=0.9)

if args.multigpu:
    net = torch.nn.DataParallel(net.cuda())

if __name__ == "__main__":
    best_acc = 0
Ejemplo n.º 14
0
    def __init__(self,
                 mode,
                 rpn_anchor_ratios,
                 rpn_anchor_scales,
                 mask_shape,
                 pool_size,
                 image_shape,
                 mini_mask_shape,
                 backbone_strides,
                 mean_pixel,
                 roi_size=7,
                 backbone='resnet50',
                 stage5=True,
                 norm='batch',
                 use_bias=True,
                 rpn_anchor_stride=1,
                 image_per_gpu=1,
                 gpu_count=1,
                 detection_max_instances=100,
                 train_rois_per_image=200,
                 num_classes=1,
                 use_mini_mask=True,
                 use_pretrained_model=True,
                 top_down_pyramid_size=256,
                 post_nms_rois_training=2000,
                 post_nms_rois_inference=1000,
                 pre_nms_limit=6000,
                 rpn_nms_threshold=0.7,
                 use_rpn_rois=True,
                 model_dir=None,
                 optimizer_method='Adam',
                 learning_rate=0.001,
                 momentum=0.9,
                 weight_decay=0.0001,
                 image_min_dim=800,
                 image_max_dim=1024,
                 image_min_scale=0.0,
                 image_resize_mode='square',
                 max_gt_instances=100,
                 rpn_train_anchors_per_image=256):

        assert mode in ['training', 'inference']
        assert optimizer_method in ['Adam', 'SGD']

        tf.reset_default_graph()
        self.graph = tf.Graph()

        self.mode = mode
        self.rpn_anchor_ratios = rpn_anchor_ratios
        self.rpn_anchor_scales = rpn_anchor_scales
        self.mask_shape = mask_shape
        self.pool_size = pool_size
        self.image_shape = np.array(image_shape)
        self.mini_mask_shape = mini_mask_shape
        self.backbone_strides = backbone_strides
        self.mean_pixel = mean_pixel

        self.roi_size = roi_size
        self.backbone = backbone
        self.stage5 = stage5
        self.norm = norm
        self.use_bias = use_bias
        self.rpn_anchor_stride = rpn_anchor_stride
        self.image_per_gpu = image_per_gpu
        self.gpu_count = gpu_count
        self.detection_max_instances = detection_max_instances
        self.train_rois_per_image = train_rois_per_image
        self.num_classes = num_classes
        self.use_mini_mask = use_mini_mask
        self.use_pretrained_model = use_pretrained_model
        self.top_down_pyramid_size = top_down_pyramid_size
        self.post_nms_rois_training = post_nms_rois_training
        self.post_nms_rois_inference = post_nms_rois_inference
        self.pre_nms_limit = pre_nms_limit
        self.rpn_nms_threshold = rpn_nms_threshold
        self.use_rpn_rois = use_rpn_rois
        self.model_dir = model_dir
        self.optimizer_method = optimizer_method
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.image_min_dim = image_min_dim
        self.image_max_dim = image_max_dim
        self.image_min_scale = image_min_scale
        self.image_resize_mode = image_resize_mode
        self.max_gt_instances = max_gt_instances
        self.rpn_train_anchors_per_image = rpn_train_anchors_per_image

        self.image_meta_size = 1 + 3 + 3 + 4 + 1 + self.num_classes
        self.reuse = False
        self._anchor_cache = {}
        self.batch_size = self.gpu_count * self.image_per_gpu
        self.backbone_shape = utils.compute_backbone_shapes(
            self.backbone, self.backbone_strides, self.image_shape)
        self.num_anchors_per_image = len(self.rpn_anchor_ratios) * (
            self.backbone_shape[0][0] * self.backbone_shape[0][0] +
            self.backbone_shape[1][0] * self.backbone_shape[1][0] +
            self.backbone_shape[2][0] * self.backbone_shape[2][0] +
            self.backbone_shape[3][0] * self.backbone_shape[3][0] +
            self.backbone_shape[4][0] * self.backbone_shape[4][0])

        with self.graph.as_default():

            self.is_training = tf.placeholder_with_default(False, [])
            self.input_image = tf.placeholder(dtype=tf.float32,
                                              shape=[
                                                  None, self.image_shape[0],
                                                  self.image_shape[1],
                                                  self.image_shape[2]
                                              ],
                                              name='input_image')
            self.input_image_meta = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self.image_meta_size],
                name='input_image_meta')

            if mode == 'training':
                self.input_rpn_match = tf.placeholder(
                    dtype=tf.int32,
                    shape=[None, self.num_anchors_per_image, 1],
                    name='input_rpn_match')
                self.input_rpn_boxes = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, self.rpn_train_anchors_per_image, 4],
                    name='input_rpn_boxes')
                self.input_gt_class_ids = tf.placeholder(
                    dtype=tf.int32,
                    shape=[None, self.max_gt_instances],
                    name='input_gt_class_ids')
                self.input_gt_boxes = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, self.max_gt_instances, 4],
                    name='input_gt_boxes')
                self.input_gt_boxes_normalized = utils.norm_boxes_graph(
                    self.input_gt_boxes,
                    tf.shape(self.input_image)[1:3])
                self.proposal_count = self.post_nms_rois_training
                if self.use_mini_mask:
                    self.input_gt_masks = tf.placeholder(
                        dtype=tf.bool,
                        shape=[
                            None, self.mini_mask_shape[0],
                            self.mini_mask_shape[1], self.max_gt_instances
                        ],
                        name='input_gt_mask')
                else:
                    self.input_gt_masks = tf.placeholder(
                        dtype=tf.bool,
                        shape=[
                            None, self.image_shape[0], self.image_shape[1],
                            self.max_gt_instances
                        ],
                        name='input_gt_mask')

            elif mode == 'inference':
                self.input_anchors = tf.placeholder(dtype=tf.float32,
                                                    shape=[None, None, 4],
                                                    name='input_anchors')
                self.proposal_count = self.post_nms_rois_inference

            self.resnet = Resnet(name='resnet',
                                 architecture=self.backbone,
                                 is_training=self.is_training,
                                 stage5=self.stage5,
                                 use_bias=self.use_bias)

            arg_scope = nets.resnet_v2.resnet_arg_scope()
            with slim.arg_scope(arg_scope):
                _, self.end_points = nets.resnet_v2.resnet_v2_50(
                    self.input_image,
                    num_classes=None,
                    is_training=self.is_training)

            self.fpn = FPN(name='fpn',
                           top_down_pyramid_size=self.top_down_pyramid_size,
                           use_bias=self.use_bias)

            self.rpn = RPN(name='rpn',
                           anchors_per_location=len(self.rpn_anchor_ratios),
                           anchor_stride=self.rpn_anchor_stride,
                           is_training=self.is_training,
                           use_bias=self.use_bias)
            self.proposal = ProposalLayer(self.pre_nms_limit,
                                          self.proposal_count,
                                          self.rpn_nms_threshold,
                                          self.image_per_gpu)
            self.pyramidRoiPooling = PyramidRoiPooling(
                name='PyramidRoiPooling', roi_size=self.roi_size)
            self.objDetection = ObjDetection(
                image_per_gpu=self.image_per_gpu,
                gpu_count=self.gpu_count,
                detection_max_instances=self.detection_max_instances)
            self.targetDetection = TargetDetection(
                mask_shape=self.mask_shape,
                image_per_gpu=self.image_per_gpu,
                train_rois_per_image=self.train_rois_per_image)
            self.fpnClassifier = FpnClassifier('FpnClassifier',
                                               pool_size=self.pool_size,
                                               num_classes=self.num_classes,
                                               is_training=self.is_training)
            self.fpnMask = FpnMask('FpnMask',
                                   num_classes=self.num_classes,
                                   is_training=self.is_training)
Ejemplo n.º 15
0
    shuffle=True,
    num_workers=4
)
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4
)

net = None


if args.depth == 50:
    net = resnet_bl.resnet50(num_classes=args.class_num)
    fpn = FPN(in_channels=[256,512,1024,2048], out_channels=256, num_outs=5)
    fpn_head = FPNHead()
    print("using resnet 50")


net.to(device)
fpn.to(device)
fpn_head.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, weight_decay=5e-4, momentum=0.9)

if args.multigpu:
    net = torch.nn.DataParallel(net.cuda())

if __name__ == "__main__":
    best_acc = 0
Ejemplo n.º 16
0
from pycocotools.cocoeval import COCOeval
import json

os.environ["CUDA_VISIBLE_DEVICES"] = "5"

device = torch.device('cuda:0')

dataset_train = CocoDataset('../dataset', set_name='val2017',
                                    transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
dataset_val = CocoDataset('../dataset', set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

fpn = FPN()
net = Net()
anchors = Anchors()

fpn = fpn.to(device)
net = net.to(device)

criterion = AnchorBasedLoss()

optimizer1 = optim.Adam(fpn.parameters(), lr=1e-4)
optimizer2 = optim.Adam(net.parameters(), lr=1e-4)



def train():
    num = len(dataloader_train) * 2
Ejemplo n.º 17
0
def make_fpn_resnet(name: str = 'resnet18',
                    fpn_type: str = 'fpn',
                    out_size: Tuple[int, int] = (224, 224),
                    fpn_channels: int = 256,
                    num_classes: int = 1000,
                    pretrained: bool = True,
                    in_channels: int = 3) -> nn.Module:
    """Create an FPN model with a ResNet backbone.

    If `in_channels > 3`, uses the fusion technique described in the paper,
    *FuseNet*, by Hazirbas et al.
    (https://vision.in.tum.de/_media/spezial/bib/hazirbasma2016fusenet.pdf)
    that adds a parallel resnet backbone for the new channels. All the
    pretrained weights are retained.

    Args:
        name (str, optional): Name of the resnet backbone. Only those available
            in torchvision are supported. Defaults to 'resnet18'.
        fpn_type (str, optional): Type of FPN. 'fpn' | 'panoptic' | 'panet'.
            Defaults to 'fpn'.
        out_size (Tuple[int, int], optional): Size of segmentation output.
            Defaults to (224, 224).
        fpn_channels (int, optional): Number of hidden channels to use in the
            FPN. Defaults to 256.
        num_classes (int, optional): Number of classes for which to make
            predictions. Determines the channel width of the output.
            Defaults to 1000.
        pretrained (bool, optional): Whether to use pretrained backbone.
            Defaults to True.
        in_channels (int, optional): Channel width of the input. If less than
            3, conv1 is replaced with a smaller one. If greater than 3, a
            FuseNet-style architecture is used to incorporate the new channels.
            In both cases, pretrained weights are retained. Defaults to 3.

    Raises:
        NotImplementedError: On unknown fpn_style.

    Returns:
        nn.Module: the FPN model
    """
    assert in_channels > 0
    assert num_classes > 0
    assert out_size[0] > 0 and out_size[1] > 0

    resnet = tv.models.resnet.__dict__[name](pretrained=pretrained)
    if in_channels == 3:
        backbone = ResNetFeatureMapsExtractor(resnet)
    else:
        old_conv = resnet.conv1
        old_conv_args = {
            'out_channels': old_conv.out_channels,
            'kernel_size': old_conv.kernel_size,
            'stride': old_conv.stride,
            'padding': old_conv.padding,
            'dilation': old_conv.dilation,
            'groups': old_conv.groups,
            'bias': old_conv.bias
        }
        if not pretrained:
            # just replace the first conv layer
            new_conv = nn.Conv2d(in_channels=in_channels, **old_conv_args)
            resnet.conv1 = new_conv
            backbone = ResNetFeatureMapsExtractor(resnet)
        else:
            if in_channels > 3:
                new_channels = in_channels - 3
                new_conv = nn.Conv2d(in_channels=new_channels, **old_conv_args)

                resnet_cls = tv.models.resnet.__dict__[name]
                new_resnet = resnet_cls(pretrained=pretrained)
                new_resnet.conv1 = copy_conv_weights(old_conv, new_conv)

                backbone = make_fusion_resnet_backbone(resnet, new_resnet)
            else:
                new_conv = nn.Conv2d(in_channels=in_channels, **old_conv_args)
                resnet.conv1 = copy_conv_weights(old_conv, new_conv)
                backbone = ResNetFeatureMapsExtractor(resnet)

    feat_shapes = _get_shapes(backbone, channels=in_channels, size=out_size)
    if fpn_type == 'fpn':
        fpn = nn.Sequential(
            FPN(feat_shapes,
                hidden_channels=fpn_channels,
                out_channels=num_classes), SelectOne(idx=0))
    elif fpn_type == 'panoptic':
        fpn = PanopticFPN(feat_shapes,
                          hidden_channels=fpn_channels,
                          out_channels=num_classes)
    elif fpn_type == 'panet':
        fpn1 = FPN(feat_shapes,
                   hidden_channels=fpn_channels,
                   out_channels=fpn_channels)

        feat_shapes = [(n, fpn_channels, h, w) for (n, c, h, w) in feat_shapes]
        fpn2 = FPN(feat_shapes[::-1],
                   hidden_channels=fpn_channels,
                   out_channels=num_classes)
        fpn = nn.Sequential(PANetFPN(fpn1, fpn2), SelectOne(idx=0))
    else:
        raise NotImplementedError()

    # yapf: disable
    model = nn.Sequential(
        backbone,
        fpn,
        Interpolate(size=out_size, mode='bilinear', align_corners=True))
    # yapf: enable
    return model
Ejemplo n.º 18
0
    def __init__(self,
                 block,
                 layers,
                 fpn_in_channels,
                 fpn_out_channels,
                 fpn_num_outs,
                 num_classes=100,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])

        self.scala4 = nn.AvgPool2d(4, 4)

        self.fc4 = nn.Linear(512 * block.expansion, num_classes)
        self.fpn = FPN(in_channels=fpn_in_channels,
                       out_channels=fpn_out_channels,
                       num_outs=fpn_num_outs)
        self.fpn_head = FPNHead(num_classes=num_classes, n_maps=fpn_num_outs)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
Ejemplo n.º 19
0
# -*- coding: utf-8 -*-
"""
@File    : fpn_test.py
@Time    : 12/12/20 9:40 PM
@Author  : Mingqiang Ning
@Email   : [email protected]
@Modify Time        @Version    @Description
------------        --------    -----------
12/12/20 9:40 PM      1.0         None
# @Software: PyCharm
"""
import torch
from fpn import FPN
net=FPN([3,4,6,3]).cuda()
print(net)
input=torch.randn(1,3,224,224).cuda()
output=net(input)