def output_shape(self): """ Returns: ShapeSpec: the output feature shape """ o = self._output_size if isinstance(o, int): return ShapeSpec(channels=o) else: return ShapeSpec(channels=o[0], height=o[1], width=o[2])
def build_torch_backbone(cfg, input_shape=None): if input_shape is None: input_shape = ShapeSpec(channels=3) backbone = ResnetBackbone(cfg, input_shape) assert isinstance(backbone, Backbone) return backbone
def __init__( self, input_shape, box2box_transform, num_classes, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, ): super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image
def _init_mask_head(cls, cfg, input_shape): mask_in_features = ['p2', 'p3', 'p4', 'p5'] in_channels = [input_shape[f].channels for f in mask_in_features] pooler_resolution = 7 pooler_scales = list(1.0 / input_shape[k].stride for k in mask_in_features) sampling_ratio = 0 pooler_type = 'ROIAlignV2' mask_pooler_resolution = 14 mask_pooler = ROIPooler( output_size=mask_pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) head_cfg = MaskRCNNConvUpsampleHead.from_config(cfg, shape) mask_head = MaskRCNNConvUpsampleHead(**head_cfg) return { 'mask_in_features': mask_in_features, 'mask_pooler': mask_pooler, 'mask_head': mask_head }
def _init_box_head(cls, cfg, input_shape): in_features = ['p2', 'p3', 'p4', 'p5'] pooler_resolution = 7 pooler_scales = list(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = 0 pooler_type = 'ROIAlignV2' cascade_bbox_reg_weights = ( (10.0, 10.0, 5.0, 5.0), (20.0, 20.0, 10.0, 10.0), (30.0, 30.0, 15.0, 15.0), ) cascade_ious = (0.5, 0.6, 0.7) assert len(cascade_bbox_reg_weights) == len(cascade_ious) in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) pooled_shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) box_heads, box_predictors, proposal_matchers = [], [], [] for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): box_head_cfg = FastRCNNConvFCHead.from_config(cfg, pooled_shape) box_head = FastRCNNConvFCHead(**box_head_cfg) box_heads.append(box_head) tmp_cfg = { "input_shape": box_head.output_shape, "box2box_transform": Box2BoxTransform(weights=(10, 10, 5, 5)), "num_classes": cfg.NUM_CLASSES, "cls_agnostic_bbox_reg": True, "smooth_l1_beta": 0.0, "test_score_thresh": 0.05, "test_nms_thresh": 0.5, "test_topk_per_image": 100 } box_predictors.append(FastRCNNOutputLayers(**tmp_cfg, )) proposal_matchers.append( Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_heads": box_heads, "box_predictors": box_predictors, "proposal_matchers": proposal_matchers, }
def __init__(self, cfg): super().__init__() self.backbone = build_resnet_fpn_backbone(cfg, ShapeSpec(channels=3)) self.rpn = RPN(cfg, self.backbone.output_shape()) roi_head_cfg = CascadeROIHeads.from_config( cfg, self.backbone.output_shape()) self.roi_head = CascadeROIHeads(**roi_head_cfg) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu")
def output_shape(self): """ Returns: dict[str->ShapeSpec] """ # this is a backward-compatible default return { name: ShapeSpec( channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] ) for name in self._out_features }
def output_shape(self): return { name: ShapeSpec(channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]) for name in self._out_features }
from utils import ShapeSpec cfg = ED() # TODO: Most hyperparameters be hard code(so I can test it easy), it all will be added to this file later(maybe). # trainer # epoch, batch_size, so on. # base info cfg.NUM_CLASSES = 1 cfg.EPOCH = 100 cfg.lr = 1e-4 # data cfg.ROOT = 'datasets/wgisd' cfg.BATCH_SIZE = 3 cfg.NUM_WORKERS = 8 cfg.RESIZE = (800, 1280) # backbone cfg.BACKBONE_DEPTH = 101 cfg.BACKBONE_OUTPUT_SHAPE = { 'res2': ShapeSpec(channels=256, height=None, width=None, stride=4), 'res3': ShapeSpec(channels=512, height=None, width=None, stride=8), 'res4': ShapeSpec(channels=1024, height=None, width=None, stride=16), 'res5': ShapeSpec(channels=2048, height=None, width=None, stride=32) } # MASK cfg.CLS_AGNOSTIC_MASK = False