def test_dump_IR_tracing(self): cfg = get_cfg() cfg.MODEL.RESNETS.DEPTH = 18 cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64 class Mod(nn.Module): def forward(self, x): return tuple(self.m(x).values()) model = Mod() model.m = build_backbone(cfg) model.eval() with torch.no_grad(): ts_model = torch.jit.trace(model, (torch.rand(2, 3, 224, 224), )) with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: dump_torchscript_IR(ts_model, d) # check that the files are created for name in [ "model_ts_code", "model_ts_IR", "model_ts_IR_inlined", "model" ]: fname = os.path.join(d, name + ".txt") self.assertTrue(os.stat(fname).st_size > 0, fname)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.mask_on = cfg.MODEL.MASK_ON self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.num_classes = cfg.MODEL.MSBCNet.NUM_CLASSES self.num_proposals = cfg.MODEL.MSBCNet.NUM_PROPOSALS self.hidden_dim = cfg.MODEL.MSBCNet.HIDDEN_DIM self.num_heads = cfg.MODEL.MSBCNet.NUM_HEADS # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Proposals. self.init_proposal_features = nn.Embedding(self.num_proposals, self.hidden_dim) self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4) nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5) nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0) # Build Dynamic Head. self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape()) # Loss parameters: class_weight = cfg.MODEL.MSBCNet.CLASS_WEIGHT giou_weight = cfg.MODEL.MSBCNet.GIOU_WEIGHT l1_weight = cfg.MODEL.MSBCNet.L1_WEIGHT no_object_weight = cfg.MODEL.MSBCNet.NO_OBJECT_WEIGHT self.deep_supervision = cfg.MODEL.MSBCNet.DEEP_SUPERVISION self.use_focal = cfg.MODEL.MSBCNet.USE_FOCAL # Build Criterion. matcher = HungarianMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight, use_focal=self.use_focal) weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight} if self.deep_supervision: aux_weight_dict = {} for i in range(self.num_heads - 1): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses, use_focal=self.use_focal) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): nn.Module.__init__(self) self.backbone = build_backbone(cfg) self.out_features = ["out"] assert cfg.MODEL.BACKBONE.SIMPLE is True self.feature_strides = [cfg.MODEL.BACKBONE.STRIDE] self.num_channels = [cfg.MODEL.BACKBONE.CHANNEL] self.strides = [cfg.MODEL.BACKBONE.STRIDE]
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() self.feature_strides = [ backbone_shape[f].stride for f in backbone_shape.keys() ] self.num_channels = backbone_shape[list( backbone_shape.keys())[-1]].channels
def __init__(self, cfg): super(MaskedBackbone, self).__init__() self.backbone = build_backbone(cfg) self.strides = [ self.backbone.output_shape()[key].stride for key in self.backbone.output_shape().keys() ] self.num_channels = self.backbone.output_shape()[ cfg.MODEL.RESNETS.OUT_FEATURES[-1]].channels
def __init__(self, cfg): super().__init__() self.mean, self.std = cfg.MODEL.PIXEL_MEAN, cfg.MODEL.PIXEL_STD self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) # TODO: change to a more precise name
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() self.feature_strides = [ backbone_shape[f].stride for f in backbone_shape.keys() ] self.num_channels = backbone_shape[list( backbone_shape.keys())[cfg.MODEL.DETR.INDEX_FEEDFORWARD]].channels
def __init__(self, cfg=None): super(ClassificationBackbone, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = 1000 self.backbone = build_backbone(cfg) self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.mask_threshold = cfg.MODEL.ROI_MASK_HEAD.MASK_THRESHOLD self.nms = cfg.MODEL.ROI_MASK_HEAD.NMS self.depth_head_on = cfg.MODEL.DEPTH_ON if self.depth_head_on: self.depth_head = build_depth_head(cfg) self.camera_on = cfg.MODEL.CAMERA_ON if self.camera_on: self.camera_head = build_camera_head(cfg) self.input_format = cfg.INPUT.FORMAT assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = (torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( num_channels, 1, 1)) pixel_std = (torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1)) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.embedding_on = cfg.MODEL.EMBEDDING_ON if self.embedding_on: self._asnet_on = ( cfg.MODEL.ROI_EMBEDDING_HEAD.NAME == "EmbeddingRCNNASNetHead") self.embedding_loss_weight = cfg.MODEL.ROI_EMBEDDING_HEAD.LOSS_WEIGHT if cfg.MODEL.ROI_EMBEDDING_HEAD.LOSS_TYPE == "TripletLoss": if not self._asnet_on: self.embedding_loss = OnlineTripletLoss( cfg.MODEL.ROI_EMBEDDING_HEAD.MARGIN, cfg.MODEL.DEVICE, selector_type=cfg.MODEL.ROI_EMBEDDING_HEAD. TRIPLET_SELECTOR_TYPE, ) else: self.embedding_loss = CooperativeTripletLoss( cfg.MODEL.ROI_EMBEDDING_HEAD.MARGIN, cfg.MODEL.DEVICE, selector_type=cfg.MODEL.ROI_EMBEDDING_HEAD. TRIPLET_SELECTOR_TYPE, ) else: raise NotImplementedError self._eval_gt_box = cfg.TEST.EVAL_GT_BOX self.to(self.device) self._freeze = cfg.MODEL.FREEZE for layers in self._freeze: layer = layers.split(".") final = self for l in layer: final = getattr(final, l) for params in final.parameters(): params.requires_grad = False
def __init__(self, cfg): nn.Module.__init__(self) self.backbone = build_backbone(cfg) self.out_features = cfg.MODEL.FBNET_V2.OUT_FEATURES self.feature_strides = list(self.backbone._out_feature_strides.values()) self.num_channels = [ self.backbone._out_feature_channels[k] for k in self.out_features ] self.strides = [ self.backbone._out_feature_strides[k] for k in self.out_features ]
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.nms = cfg.MODEL.OneNet.NMS # 是否使用非极大值抑制,默认是false # [res2, res3, res4, res5] self.in_features = cfg.MODEL.OneNet.IN_FEATURES # num_classes:80 self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES # 100 is the limit for coco self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE # Build Backbone. # use resnet50 as backbone self.backbone = build_backbone(cfg) # default is 0 self.size_divisibility = self.backbone.size_divisibility # Build Head. # return a class_logits and pred_boxes # backbone_shape: dict['res{k}':ShapeSpec(channel,...,stride)] # 描述每个feature map 的channel, cur_stride 的信息 self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape()) # Loss parameters: # 2.0 class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT # 2.0 giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT # 5.0 distance between center point l1_weight = cfg.MODEL.OneNet.L1_WEIGHT # Build Criterion. matcher = MinCostMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight) weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight} losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses) # pixel_mean:list[3] pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std # x:C x H x W, normalize the pixel self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.nms = cfg.MODEL.OneNet.NMS self.in_features = cfg.MODEL.OneNet.IN_FEATURES self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE self.head_type = cfg.MODEL.OneNet.HEAD # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Head. if self.head_type == "CenterNet": self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape()) elif self.head_type == 'RetinaNet': backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in cfg.MODEL.OneNet.IN_FEATURES] self.head = RetinaHead(cfg=cfg, feature_shapes=feature_shapes) elif self.head_type == "FCOS": self.head = FCOSHead(cfg=cfg) else: raise NotImplementedError # Build Criterion. matcher = MinCostMatcher(cfg=cfg, cost_class=cfg.MODEL.OneNet.CLASS_COST, cost_bbox=cfg.MODEL.OneNet.L1_COST, cost_giou=cfg.MODEL.OneNet.GIOU_COST) # Loss parameters: class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT l1_weight = cfg.MODEL.OneNet.L1_WEIGHT weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight} losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg=None): super(Yolov3, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.YOLOV3.NUM_CLASSES self.norm = cfg.MODEL.YOLOV3.NORM self.in_features = cfg.MODEL.YOLOV3.IN_FEATURES self.anchors = cfg.MODEL.ANCHOR_GENERATOR.SIZES # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.backbone = build_backbone(cfg) self.head = Yolov3Head(in_features=self.in_features, in_channels=[ self.backbone._out_feature_channels[f] for f in self.in_features ], out_channels=cfg.MODEL.YOLOV3.HEAD.OUT_CHANNELS, num_classes=self.num_classes, num_anchors_per_cell=3, norm=self.norm) backbone_shape = self.backbone.output_shape() self.feature_strides = [ backbone_shape[f].stride for f in self.in_features ] self.feature_shapes = [backbone_shape[f] for f in self.in_features] self.anchor_generator = build_anchor_generator(cfg, self.feature_shapes) self.grid_generator = build_grid_generator(cfg, self.feature_shapes) self.stride_generator = build_stride_generator(cfg, self.feature_shapes) # self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) self.normalizer = lambda x: x / 255.0 self.to(self.device) self.get_conv_bn_modules() self.bce_loss = nn.BCELoss() self.sigmoid = nn.Sigmoid()
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
def test_build_rpn_heads_with_rotated_anchor_generator(self): """ Make sure rpn heads work with rotated anchor generator""" self.assertGreater(len(rpn.RPN_HEAD_REGISTRY._obj_map), 0) for name, builder in rpn.RPN_HEAD_REGISTRY._obj_map.items(): logger.info("Testing {}...".format(name)) cfg = GeneralizedRCNNRunner().get_default_cfg() if name in RPN_CFGS: cfg.merge_from_file(RPN_CFGS[name]) cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" backbone = build_backbone(cfg) backbone_shape = backbone.output_shape() rpn_input_shape = [ backbone_shape[x] for x in cfg.MODEL.RPN.IN_FEATURES ] rpn_head = builder(cfg, rpn_input_shape) in_channels = list(backbone_shape.values())[0].channels anchor_generator = build_anchor_generator(cfg, rpn_input_shape) num_anchors = anchor_generator.num_cell_anchors[0] box_dim = anchor_generator.box_dim N, C_in, H, W = 2, in_channels, 24, 32 input = torch.rand([N, C_in, H, W], dtype=torch.float32) LAYERS = len(cfg.MODEL.RPN.IN_FEATURES) out = rpn_head([input] * LAYERS) self.assertEqual(len(out), 2) logits, bbox_reg = out for idx in range(LAYERS): self.assertEqual( logits[idx].shape, torch.Size([ input.shape[0], num_anchors, input.shape[2], input.shape[3] ]), ) self.assertEqual( bbox_reg[idx].shape, torch.Size([ logits[idx].shape[0], num_anchors * box_dim, logits[idx].shape[2], logits[idx].shape[3], ]), )
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.nms = cfg.MODEL.OneNet.NMS self.in_features = cfg.MODEL.OneNet.IN_FEATURES self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Head. self.head = Head(cfg=cfg, backbone_shape=self.backbone.output_shape()) # Loss parameters: class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT l1_weight = cfg.MODEL.OneNet.L1_WEIGHT # Build Criterion. matcher = MinCostMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight) weight_dict = { "loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight } losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( -1, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( -1, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device) self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) self.ins_embed_head = build_ins_embed_branch( cfg, self.backbone.output_shape()) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) self.meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) self.stuff_area = cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA self.threshold = cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD self.nms_kernel = cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL self.top_k = cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE self.predict_instances = cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() if cfg.MODEL.DETR.NUM_FEATURE_LEVELS > 1: self.strides = [8, 16, 32] else: self.strides = [32] if cfg.MODEL.RESNETS.RES5_DILATION == 2: # fix dilation from d2 self.backbone.stages[-1][0].conv2.dilation = (1, 1) self.backbone.stages[-1][0].conv2.padding = (1, 1) self.strides[-1] = self.strides[-1] // 2 self.feature_strides = [backbone_shape[f].stride for f in backbone_shape.keys()] self.num_channels = [backbone_shape[k].channels for k in backbone_shape.keys()]
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, args, cfg, device, num_max_regions): super(RFGenerator, self).__init__() self.device = device self.cfg = cfg self.backbone = build_backbone(self.cfg) self.pooler_resolution = 14 self.canonical_level = 4 self.canonical_scale_factor = 2 ** self.canonical_level self.pooler_scales = (1 / self.canonical_scale_factor,) self.sampling_ratio = 0 self.proposal_generator = build_proposal_generator(self.cfg, self.backbone.output_shape()) self.roi_pooler = ROIPooler( output_size=self.pooler_resolution, scales=self.pooler_scales, sampling_ratio=self.sampling_ratio, pooler_type="ROIPool" ) self.num_max_regions = num_max_regions self.args = args
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # build feature extraction backbone self.backbone = build_backbone(cfg) # build classification model if cfg.MODEL.MULTI_TASK.CLASSIFICATION_ON: self.classifier_in_features = cfg.MODEL.MULTI_TASK.CLASSIFICATION_IN_FEATURES self.classifier = build_multilabel_classifier(cfg) else: self.classifier = None # build segmentation model if cfg.MODEL.MULTI_TASK.SEGMENTATION_ON: self.metal_segmentation_in_features = cfg.MODEL.MULTI_TASK.SEGMENTATION_IN_FEATURES self.metal_segmentation = build_metal_segmentation_model(cfg, self.backbone.out_feature_strides) else: self.metal_segmentation = None # build object detection model if cfg.MODEL.MULTI_TASK.DETECTION_ON: self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) else: self.proposal_generator = None self.roi_heads = None # TODO: build multi-task layer self.multi_loss_layer = build_multitask_loss_layer(cfg) # other setting self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device) # all to cuda
def __init__(self, cfg): super().__init__() self.backbone = build_backbone(cfg) self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) self.ins_embed_head = build_ins_embed_branch(cfg, self.backbone.output_shape()) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) self.meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) self.stuff_area = cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA self.threshold = cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD self.nms_kernel = cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL self.top_k = cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE self.predict_instances = cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES self.input_format = cfg.INPUT.FORMAT self.use_depthwise_separable_conv = cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV assert ( cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV == cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV ) self.size_divisibility = cfg.MODEL.PANOPTIC_DEEPLAB.SIZE_DIVISIBILITY self.benchmark_network_speed = cfg.MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone_level = cfg.MODEL.YOLOF.ENCODER.BACKBONE_LEVEL self.backbone = build_backbone(cfg) self.nums_classes = cfg.MODEL.YOLOF.DECODER.NUM_CLASSES # build anchor generator backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[self.backbone_level]] self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # build encode decode self.encoder = DilatedEncoder(cfg, backbone_shape) self.decoder = Decoder(cfg) # prepare ground truth self.box2box_transform = YOLOFBox2BoxTransform( weights=cfg.MODEL.YOLOF.BOX_TRANSFORM.BBOX_REG_WEIGHTS, add_ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.ADD_CTR_CLAMP, ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.CTR_CLAMP) self.anchor_matcher = UniformMatcher(cfg.MODEL.YOLOF.MATCHER.TOPK) self.test_score_thresh = 0.05 self.test_nms_thresh = 0.6 self.test_topk_candidates = 1000 self.max_detections_per_image = 100 # build loss self.losses = Losses(cfg) # get normalizer pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): super().__init__() in_channels = cfg.MODEL.AVOD.IN_CHANNELS self.device = torch.device(cfg.MODEL.DEVICE) self.head = build_avod_head(cfg, in_channels) self.box_selector_test = build_avod_postprocessor(cfg) self.loss_evaluator = build_avod_loss_evaluator(cfg) self.fpn_strides = cfg.MODEL.AVOD.FPN_STRIDES self.in_features = cfg.MODEL.AVOD.IN_FEATURES self.device = torch.device(cfg.MODEL.DEVICE) self.backbone = build_backbone(cfg) # backbone_shape = self.backbone.output_shape() # feature_shapes = [backbone_shape[f] for f in self.in_features] pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
from detectron2.engine import DefaultPredictor from detectron2.modeling import build_backbone from detectron2.config import get_cfg confidence_threshold = 0.5 config_file = "../detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml" model_weights = "../detectron2/demo/faster_rcnn_R_101_C4_3x.pkl" cfg = get_cfg() cfg.merge_from_file(config_file) cfg.MODEL.WEIGHTS =model_weights cfg.MODEL.RETINANET.SCORE_THRESH_TEST = confidence_threshold cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = confidence_threshold cfg.freeze() backbone = build_backbone(cfg) predictor = DefaultPredictor(cfg) def getFeature(img,raw_boxes): """ The input is the image and the bounding boxes; The output is a list which contain serveal features corresponding to the bounding boxes """ raw_height, raw_width = img.shape[:2] image = predictor.transform_gen.get_transform(img).apply_image(img) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) features = predictor.model.backbone(images.tensor) new_height, new_width = image.shape[:2] scale_x = 1. * new_width / raw_width
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.nms = cfg.MODEL.OneNet.NMS self.in_features = cfg.MODEL.OneNet.IN_FEATURES self.num_classes = cfg.MODEL.OneNet.NUM_CLASSES self.num_boxes = cfg.TEST.DETECTIONS_PER_IMAGE # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Head. self.head = FCOSHead(cfg) self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape()) self.mask_head = build_dynamic_mask_head(cfg) # build top module in_channels = self.backbone.output_shape()[ self.in_features[0]].channels self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE self.controller = nn.Conv2d(in_channels, self.mask_head.num_gen_params, kernel_size=3, stride=1, padding=1) # Loss parameters: class_weight = cfg.MODEL.OneNet.CLASS_WEIGHT giou_weight = cfg.MODEL.OneNet.GIOU_WEIGHT l1_weight = cfg.MODEL.OneNet.L1_WEIGHT mask_weight = 2 # Build Criterion. matcher = MinCostMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight) weight_dict = { "loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_mask": mask_weight } losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): super().__init__() self.cfg = cfg self.device = torch.device(cfg.MODEL.DEVICE) self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.num_classes = cfg.MODEL.ISTR.NUM_CLASSES self.num_proposals = cfg.MODEL.ISTR.NUM_PROPOSALS self.hidden_dim = cfg.MODEL.ISTR.HIDDEN_DIM self.num_heads = cfg.MODEL.ISTR.NUM_HEADS # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Proposals. self.pos_embeddings = nn.Embedding(self.num_proposals, self.hidden_dim) self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4) nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5) nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0) # -------- self.IFE = ImgFeatExtractor(cfg) self.mask_encoding = PCAMaskEncoding(cfg) # encoding parameters. components_path = cfg.MODEL.ISTR.PATH_COMPONENTS # update parameters. parameters = np.load(components_path) components = nn.Parameter(torch.from_numpy( parameters['components_c'][0]).float().to(self.device), requires_grad=False) explained_variances = nn.Parameter(torch.from_numpy( parameters['explained_variance_c'][0]).float().to(self.device), requires_grad=False) means = nn.Parameter(torch.from_numpy( parameters['mean_c'][0]).float().to(self.device), requires_grad=False) self.mask_encoding.components = components self.mask_encoding.explained_variances = explained_variances self.mask_encoding.means = means # Build Dynamic Head. self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape()) # Loss parameters: class_weight = cfg.MODEL.ISTR.CLASS_WEIGHT giou_weight = cfg.MODEL.ISTR.GIOU_WEIGHT l1_weight = cfg.MODEL.ISTR.L1_WEIGHT no_object_weight = cfg.MODEL.ISTR.NO_OBJECT_WEIGHT mask_weight = cfg.MODEL.ISTR.MASK_WEIGHT self.deep_supervision = cfg.MODEL.ISTR.DEEP_SUPERVISION # Build Criterion. matcher = HungarianMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight, cost_mask=mask_weight) weight_dict = { "loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_feat": mask_weight, "loss_dice": mask_weight } if self.deep_supervision: aux_weight_dict = {} for i in range(self.num_heads - 1): aux_weight_dict.update( {k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes", "masks"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
import torch from detectron2.modeling import build_backbone, build_proposal_generator, build_roi_heads from backbone import cfg from backbone import build_shufflenetv2_fpn_backbone backbone_model = build_backbone(cfg) print(backbone_model) torch.save(backbone_model.state_dict(), '1.backbone.pth') proposal_model = build_proposal_generator(cfg, backbone_model.output_shape()) print(proposal_model) torch.save(proposal_model.state_dict(), '2.rpn.pth') roi_model = build_roi_heads(cfg, backbone_model.output_shape()) print(roi_model) torch.save(roi_model.state_dict(), '3.roi.pth')