def _init_keypoint_head(cls, cfg, input_shape): if not cfg.MODEL.KEYPOINT_ON: return {} # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features][0] ret = {"keypoint_in_features": in_features} ret["keypoint_pooler"] = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) ret["keypoint_head"] = build_keypoint_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) ) return ret
def _init_mask_head(self, cfg, input_shape): # fmt: off self.mask_on = cfg.MODEL.MASK_ON if not self.mask_on: return self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES self.mask_coarse_side_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION self._feature_scales = { k: 1.0 / v.stride for k, v in input_shape.items() } # fmt: on in_channels = np.sum( [input_shape[f].channels for f in self.mask_coarse_in_features]) self.mask_coarse_head = build_mask_head( cfg, ShapeSpec( channels=in_channels, width=self.mask_coarse_side_size, height=self.mask_coarse_side_size, ), ) self._init_point_head(cfg, input_shape)
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] assert pooler_type in ["ROIAlignRotated"] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers( input_size=self.box_head.output_size, num_classes=self.num_classes, cls_agnostic_bbox_reg=self.cls_agnostic_bbox_reg, box_dim=5, )
def __init__(self, input_shape, num_classes, cls_agnostic_bbox_reg, box_dim=4): """ Args: input_shape (ShapeSpec): shape of the input feature num_classes (int): number of foreground classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression box_dim (int): the dimension of bounding boxes. Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes """ super().__init__() if isinstance(input_shape, int): # some backward compatbility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.constant_(self.cls_score.bias, 0)
def _init_point_head(self, cfg, input_shape): # fmt: off self.mask_point_on = True # always on assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS # next two parameters are use in the adaptive subdivions inference procedure self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS # fmt: on in_channels = np.sum([input_shape[f].channels for f in self.mask_point_in_features]) self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) self.num_params = self.point_head.num_params # inference parameters self.mask_point_subdivision_init_resolution = int( math.sqrt(self.mask_point_subdivision_num_points) ) assert ( self.mask_point_subdivision_init_resolution * self.mask_point_subdivision_init_resolution == self.mask_point_subdivision_num_points )
def _init_keypoint_head(self, cfg): # fmt: off self.keypoint_on = cfg.MODEL.KEYPOINT_ON if not self.keypoint_on: return pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE self.normalize_loss_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS # noqa self.keypoint_loss_weight = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT # fmt: on in_channels = [self.feature_channels[f] for f in self.in_features][0] self.keypoint_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.keypoint_head = build_keypoint_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) )
def test_keypoint_head_scriptability(self): input_shape = ShapeSpec(channels=1024, height=14, width=14) keypoint_features = torch.randn(4, 1024, 14, 14) image_shapes = [(10, 10), (15, 15)] pred_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6], [1, 5, 2, 8]], dtype=torch.float32) pred_instance0 = Instances(image_shapes[0]) pred_instance0.pred_boxes = Boxes(pred_boxes0) pred_boxes1 = torch.tensor([[7, 3, 10, 5]], dtype=torch.float32) pred_instance1 = Instances(image_shapes[1]) pred_instance1.pred_boxes = Boxes(pred_boxes1) keypoint_head = KRCNNConvDeconvUpsampleHead(input_shape, num_keypoints=17, conv_dims=[512, 512]).eval() origin_outputs = keypoint_head( keypoint_features, deepcopy([pred_instance0, pred_instance1])) fields = { "pred_boxes": Boxes, "pred_keypoints": torch.Tensor, "pred_keypoint_heatmaps": torch.Tensor, } with patch_instances(fields) as NewInstances: sciript_keypoint_head = torch.jit.script(keypoint_head) pred_instance0 = NewInstances.from_instances(pred_instance0) pred_instance1 = NewInstances.from_instances(pred_instance1) script_outputs = sciript_keypoint_head( keypoint_features, [pred_instance0, pred_instance1]) for origin_ins, script_ins in zip(origin_outputs, script_outputs): assert_instances_allclose(origin_ins, script_ins.to_instances(), rtol=0)
def output_shape(self): return { name: ShapeSpec(channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]) for name in self._out_features }
def output_shape(self): return ShapeSpec(channels=self._out_channels)
def __init__(self): super().__init__() self.model = KRCNNConvDeconvUpsampleHead( ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,) )
def __init__(self, cfg): super().__init__() self.backbone = build_resnet_fpn_backbone(cfg, ShapeSpec(channels=3)) self.rpn = build_proposal_generator(cfg, self.backbone.output_shape())
roi_heads=L(StandardROIHeads)( num_classes=80, batch_size_per_image=512, positive_fraction=0.25, proposal_matcher=L(Matcher)(thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False), box_in_features=["p2", "p3", "p4", "p5"], box_pooler=L(ROIPooler)( output_size=7, scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), sampling_ratio=0, pooler_type="ROIAlignV2", ), box_head=L(FastRCNNConvFCHead)( input_shape=ShapeSpec(channels=256, height=7, width=7), conv_dims=[], fc_dims=[1024, 1024], ), box_predictor=L(FastRCNNOutputLayers)( input_shape=ShapeSpec(channels=1024), test_score_thresh=0.05, box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), num_classes="${..num_classes}", ), mask_in_features=["p2", "p3", "p4", "p5"], mask_pooler=L(ROIPooler)( output_size=14, scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), sampling_ratio=0, pooler_type="ROIAlignV2",
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, use_fed_loss: bool = False, use_sigmoid_ce: bool = False, get_fed_loss_cls_weights: Optional[Callable] = None, fed_loss_num_classes: int = 50, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou", "diou", "ciou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss use_fed_loss (bool): whether to use federated loss which samples additional negative classes to calculate the loss use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary cross entropy with logits. This could be used together with federated loss get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency weight power, and returns the probabilities to sample negative classes for federated loss. The implementation can be found in detectron2/data/detection_utils.py fed_loss_num_classes (int): number of federated classes to keep in total """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) self.num_classes = num_classes input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = nn.Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} self.loss_weight = loss_weight self.use_fed_loss = use_fed_loss self.use_sigmoid_ce = use_sigmoid_ce self.fed_loss_num_classes = fed_loss_num_classes if self.use_fed_loss: assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss" fed_loss_cls_weights = get_fed_loss_cls_weights() assert ( len(fed_loss_cls_weights) == self.num_classes ), "Please check the provided fed_loss_cls_weights. Their size should match num_classes" self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights)
def output_shape(self): """ Returns: ShapeSpec: the output feature shape """ return ShapeSpec(channels=self.out_channels, height=1, width=1)
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, clustering_items_per_class, clustering_start_iter, clustering_update_mu_iter, clustering_momentum, clustering_z_dimension, enable_clustering, prev_intro_cls, curr_intro_cls, max_iterations, output_dir, feat_store_path, margin, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = { "loss_cls": loss_weight, "loss_box_reg": loss_weight } self.loss_weight = loss_weight self.num_classes = num_classes self.clustering_start_iter = clustering_start_iter self.clustering_update_mu_iter = clustering_update_mu_iter self.clustering_momentum = clustering_momentum self.hingeloss = nn.HingeEmbeddingLoss(2) self.enable_clustering = enable_clustering self.prev_intro_cls = prev_intro_cls self.curr_intro_cls = curr_intro_cls self.seen_classes = self.prev_intro_cls + self.curr_intro_cls self.invalid_class_range = list( range(self.seen_classes, self.num_classes - 1)) logging.getLogger(__name__).info("Invalid class range: " + str(self.invalid_class_range)) self.max_iterations = max_iterations self.feature_store_is_stored = False self.output_dir = output_dir self.feat_store_path = feat_store_path self.feature_store_save_loc = os.path.join(self.output_dir, self.feat_store_path, 'feat.pt') if os.path.isfile(self.feature_store_save_loc): logging.getLogger( __name__).info('Trying to load feature store from ' + self.feature_store_save_loc) self.feature_store = torch.load(self.feature_store_save_loc) else: logging.getLogger(__name__).info('Feature store not found in ' + self.feature_store_save_loc + '. Creating new feature store.') self.feature_store = Store(num_classes + 1, clustering_items_per_class) self.means = [None for _ in range(num_classes + 1)] self.margin = margin
import torch from detectron2.config import get_cfg from detectron2 import model_zoo from detectron2.layers import ShapeSpec from detectron2.modeling.backbone import build_resnet_backbone cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.85 net = build_resnet_backbone(cfg, ShapeSpec(channels=3)) temp = torch.load("weight.pt") net.load_state_dict({k: temp[k] for k in net.state_dict()}) net.eval() with torch.no_grad(): torch.save(net(torch.load("data.pt")), "res1.pt")
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on unseen_path = cfg.DATASETS.UNSEEN_LABEL_SET meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) if unseen_path != '': meta_info = {e: i for i, e in enumerate(meta.thing_classes)} with open(unseen_path, 'r') as f: lines = [meta_info[e.replace('\n', '')] for e in f.readlines()] unseen_label_set = sorted(lines) meta.stuff_classes.append('unknown') meta.stuff_colors.append([20, 220, 60]) meta.stuff_dataset_id_to_contiguous_id[201] = 54 if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION or not cfg.MODEL.EOPSN.UNLABELED_REGION: label_converter = torch.ones(len(meta.thing_classes) + 1) else: label_converter = torch.ones(len(meta.thing_classes) + 2) for i in unseen_label_set: label_converter[i] = 0 reverse_label_converter = label_converter.nonzero()[:, 0].long() label_converter = torch.cumsum(label_converter, 0).long() - 1 if cfg.MODEL.EOPSN.UNLABELED_REGION: if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION: reverse_label_converter[-1] = -1 else: reverse_label_converter[-1] = reverse_label_converter[-2] reverse_label_converter[-2] = -1 else: reverse_label_converter = None label_converter = None # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) if cfg.MODEL.EOPSN.PREDICTOR == 'baseline': box_predictor = FastRCNNOutputLayers_baseline( cfg, box_head.output_shape, label_converter, reverse_label_converter) elif cfg.MODEL.EOPSN.PREDICTOR == 'eopsn': from .eopsn_predictor import FastRCNNOutputLayers_eopsn box_predictor = FastRCNNOutputLayers_eopsn( cfg, box_head.output_shape, label_converter, reverse_label_converter) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", loss_weight=1.0, oicr_iter=3, fg_threshold=0.5, bg_threshold=0.1, freeze_layers=[], embedding_path='', terms={}, mode='Pre_Softmax', mil_multiplier=4.0, detector_temp=1.0, classifier_temp=1.0): super(FastRCNNOutputsBase, self).__init__(input_shape=input_shape, box2box_transform=box2box_transform, num_classes=num_classes, test_score_thresh=test_score_thresh, test_nms_thresh=test_nms_thresh, test_topk_per_image=test_topk_per_image, cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, smooth_l1_beta=smooth_l1_beta, box_reg_loss_type=box_reg_loss_type, loss_weight=loss_weight) self.num_classes = num_classes self.oicr_iter = oicr_iter self.fg_threshold = fg_threshold self.bg_threshold = bg_threshold self.terms = terms num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.box_dim = box_dim self.num_bbox_reg_classes = num_bbox_reg_classes self.mode = mode self.mil_multiplier = mil_multiplier self.detector_temp = detector_temp self.classifier_temp = classifier_temp # Delete instances defined by super del self.cls_score del self.bbox_pred # Define delta predictors if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) self.input_size = input_size self.classifier_stream = Linear(input_size, self.num_classes) self.detection_stream = Linear(input_size, self.num_classes) self.oicr_predictors = nn.ModuleList([ Linear(input_size, self.num_classes + 1) for _ in range(self.oicr_iter) ]) self.cls_score_delta = Linear(input_size, self.num_classes + 1) self.bbox_pred_delta = Linear(input_size, num_bbox_reg_classes * box_dim) # Init Predictors nn.init.normal_(self.bbox_pred_delta.weight, std=0.001) nn.init.normal_(self.classifier_stream.weight, std=0.01) nn.init.normal_(self.detection_stream.weight, std=0.01) for oicr_iter in range(self.oicr_iter): nn.init.normal_(self.oicr_predictors[oicr_iter].weight, std=0.01) nn.init.constant_(self.oicr_predictors[oicr_iter].bias, 0.) nn.init.constant_(self.cls_score_delta.weight, 0.) # nn.init.constant_(self.bbox_pred_delta.weight, 0.) for l in [ self.cls_score_delta, self.bbox_pred_delta, self.detection_stream, self.classifier_stream ]: nn.init.constant_(l.bias, 0.) pretrained_embeddings = torch.load(embedding_path)['embeddings'] self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True) self._freeze_layers(layers=freeze_layers)
def init_model(self): assert self.backbone in [ "resnet18", "resnet50", "shufflenet_v2_x1_0", "resnet50_detectron" ] detectron_resnet_layer4 = None if self.backbone == "resnet18": backbone = resnet18 backbone_network = backbone(first_conv=self.first_conv, maxpool1=self.maxpool1, return_all_feature_maps=False) self.feature_dim = backbone_network.fc.in_features elif self.backbone == "resnet50": backbone = resnet50 backbone_network = backbone(first_conv=self.first_conv, maxpool1=self.maxpool1, return_all_feature_maps=False) self.feature_dim = backbone_network.fc.in_features elif self.backbone == "shufflenet_v2_x1_0": backbone = shufflenet_v2_x1_0 backbone_network = backbone() self.feature_dim = backbone_network.fc.in_features backbone_network.fc = Identity() elif self.backbone == "resnet50_detectron": with open("examples/local/detectron_resnet50_c4_config.yaml", "r") as f: import yaml cfg = yaml.load(f, Loader=yaml.Loader) from detectron2.modeling.backbone.resnet import build_resnet_backbone from detectron2.layers import ShapeSpec input_shape = ShapeSpec(3) #3 channels RGB backbone_network = build_resnet_backbone(cfg, input_shape) backbone_network = unfreeze_batchnorm_layers(backbone_network) detectron_resnet_layer4 = Resnet50Layer4() self.feature_dim = 2048 else: raise ValueError(f"Unsupported backbone: {self.backbone}") if self.coordconv is not None: from thelper.nn.coordconv import swap_coordconv_layers #Lazy loading. if self.coordconv == "all": backbone_network = swap_coordconv_layers(backbone_network) if self.coordconv == "first": backbone_network.conv1 = swap_coordconv_layers( backbone_network.conv1) #backbone_network = self.cyclic_predictor = None if self.loss_function == "cyclic": #Use 2 stacked inputs for the predictor self.cyclic_predictor = PredictionMLP(self.feature_dim * 2, self.hidden_mlp, self.feature_dim) #else: #All other methods work on pairs! self.online_network = SiameseArm( backbone_network, input_dim=self.feature_dim, hidden_size=self.hidden_mlp, output_dim=self.feat_dim, detectron_resnet_layer4=detectron_resnet_layer4) #max_batch = math.ceil(self.num_samples/self.batch_size) encoder, projector = self.online_network.encoder, self.online_network.projector self.train_features = torch.zeros((self.num_samples, self.feature_dim)) self.train_meta = [] self.train_targets = -torch.ones((self.num_samples)) self.valid_features = torch.zeros( (self.num_samples_valid, self.feature_dim)) self.valid_meta = [] self.cuda_train_features = None
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", loss_weight=1.0, weak_detector_head=None, regression_branch=False, terms={}, freeze_layers=[], embedding_path=''): super(SupervisedDetectorOutputsBase, self).__init__(input_shape=input_shape, box2box_transform=box2box_transform, num_classes=num_classes, test_score_thresh=test_score_thresh, test_nms_thresh=test_nms_thresh, test_topk_per_image=test_topk_per_image, cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, smooth_l1_beta=smooth_l1_beta, box_reg_loss_type=box_reg_loss_type, loss_weight=loss_weight) self.num_classes = num_classes self.terms = terms num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.box_dim = box_dim self.num_bbox_reg_classes = num_bbox_reg_classes self.weak_detector_head = weak_detector_head self.regression_branch = regression_branch # Delete instances defined by super del self.cls_score del self.bbox_pred # Define delta predictors if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) self.input_size = input_size self.cls_score_delta = Linear(input_size, self.num_classes + 1) self.bbox_pred_delta = Linear(input_size, num_bbox_reg_classes * box_dim) # Init Predictors nn.init.constant_(self.cls_score_delta.weight, 0.) if not self.regression_branch: nn.init.normal_(self.bbox_pred_delta.weight, std=0.001) else: nn.init.constant_(self.bbox_pred_delta.weight, 0.) for l in [self.cls_score_delta, self.bbox_pred_delta]: nn.init.constant_(l.bias, 0.) pretrained_embeddings = torch.load(embedding_path)['embeddings'] self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True) self._freeze_layers(layers=freeze_layers)
def output_shape(self): return { "res5": ShapeSpec(channels=1024, stride=16 if self.res5_dilation == 2 else 32) }
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE st_pooler_type = cfg.MODEL.SPATIOTEMPORAL.ST_POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES self.st_cls = cfg.MODEL.SPATIOTEMPORAL.ST_CLS self.spatial_cls = cfg.MODEL.SPATIOTEMPORAL.SPATIAL_CLS self.longterm_proposals = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.REF_POST_NMS_TOP_N self.st_box_head_name = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.NAME self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE # fmt: on self.st_cls_short_term_aggregation = cfg.MODEL.SPATIOTEMPORAL.ST_CLS_SHORT_TERM_AGGREGATION self.proposal_tracking = cfg.MODEL.SPATIOTEMPORAL.PROPOSAL_TRACKING self.test_tracking_type = cfg.MODEL.SPATIOTEMPORAL.TEST_TRACKING_TYPE # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.long_term_proposal_matcher = Matcher( [0.3], # TODO: config(cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS) [0, 1], # TODO: config(cfg.MODEL.ROI_HEADS.IOU_LABELS) allow_low_quality_matches=False, ) self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.st_box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=st_pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. if self.st_cls: self.st_box_head = build_st_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.st_cls_predictor = StClassificationOutputLayers( self.st_box_head.output_size, self.num_classes) self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg) if cfg.MODEL.SPATIOTEMPORAL.FREEZE_SPATIAL_HEAD: self.freeze_component(self.box_head) self.freeze_component(self.box_predictor)
def __init__( self, input_shape, *, box2box_transform, num_classes, num_attr_classes, max_attr_pred, attr_cls_mode, attr_cls_agnostic, ignore_nan_attr_class, test_attr_score_thresh=0.5, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes num_attr_classes (int): number of attributes classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) #print("Is class agnostic: ", attr_cls_agnostic) if attr_cls_agnostic: num_attr_reg_classes = 1 else: num_attr_reg_classes = num_classes if attr_cls_mode == 0: self.attr_cls_score = Linear( input_size, num_attr_reg_classes * num_attr_classes) nn.init.normal_(self.attr_cls_score.weight, std=0.01) nn.init.constant_(self.attr_cls_score.bias, 0) elif attr_cls_mode == 1: self.attr_cls_score_1 = Linear(input_size, 1024) self.attr_cls_score_2 = Linear( 1024, num_attr_reg_classes * num_attr_classes) nn.init.normal_(self.attr_cls_score_1.weight, std=0.01) nn.init.constant_(self.attr_cls_score_1.bias, 0) nn.init.normal_(self.attr_cls_score_2.weight, std=0.01) nn.init.constant_(self.attr_cls_score_2.bias, 0) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.num_attr_classes = num_attr_classes #295 self.max_attr_pred = max_attr_pred self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.test_attr_score_thresh = test_attr_score_thresh self.attr_cls_mode = attr_cls_mode self.attr_cls_agnostic = attr_cls_agnostic self.ignore_nan_attr_class = ignore_nan_attr_class
def test_StandardROIHeads_scriptability(self): cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) cfg.MODEL.MASK_ON = True cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} roi_heads = StandardROIHeads(cfg, feature_shape).eval() proposal0 = Instances(image_sizes[0]) proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) proposal0.proposal_boxes = Boxes(proposal_boxes0) proposal0.objectness_logits = torch.tensor([0.5, 0.7], dtype=torch.float32) proposal1 = Instances(image_sizes[1]) proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) proposal1.proposal_boxes = Boxes(proposal_boxes1) proposal1.objectness_logits = torch.tensor([0.1, 0.9], dtype=torch.float32) proposals = [proposal0, proposal1] pred_instances, _ = roi_heads(images, features, proposals) fields = { "objectness_logits": "Tensor", "proposal_boxes": "Boxes", "pred_classes": "Tensor", "scores": "Tensor", "pred_masks": "Tensor", "pred_boxes": "Boxes", "pred_keypoints": "Tensor", "pred_keypoint_heatmaps": "Tensor", } with patch_instances(fields) as new_instances: proposal0 = new_instances.from_instances(proposal0) proposal1 = new_instances.from_instances(proposal1) proposals = [proposal0, proposal1] scripted_rot_heads = torch.jit.script(roi_heads) scripted_pred_instances, _ = scripted_rot_heads( images, features, proposals) for instance, scripted_instance in zip(pred_instances, scripted_pred_instances): self.assertEqual(instance.image_size, scripted_instance.image_size) self.assertTrue( torch.equal(instance.pred_boxes.tensor, scripted_instance.pred_boxes.tensor)) self.assertTrue( torch.equal(instance.scores, scripted_instance.scores)) self.assertTrue( torch.equal(instance.pred_classes, scripted_instance.pred_classes)) self.assertTrue( torch.equal(instance.pred_masks, scripted_instance.pred_masks))
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, add_unlabeled_class=False, label_converter=None, reverse_label_converter=None, num_centroid=256, clustering_interval=1000, cluster_obj_thresh=0.8, coupled_cos_thresh=0.15, coupled_obj_thresh=0.9, cos_thresh=0.15, pos_class_thresh=0.7, nms_thresh=0.3, n_sample=20, output_dir='./'): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.label_converter = label_converter self.reverse_label_converter = reverse_label_converter self.original_num_classes = len(self.label_converter) addition = self.label_converter.max() + torch.arange(num_centroid) + 1 self.label_converter = torch.cat((self.label_converter, addition)) if self.reverse_label_converter is not None: num_classes = min(num_classes + 1, len(reverse_label_converter)) num_cls = num_classes self.add_unlabeled_class = add_unlabeled_class self.num_classes = num_cls num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1 box_dim = len(box2box_transform.weights) self.cls_score = Linear(input_size, num_cls + num_centroid) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.constant_(self.cls_score.bias, 0) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.bbox_pred.weight, std=0.001) nn.init.constant_(self.bbox_pred.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight self.feature_memory = [] self.label_memory = [] self.obj_score_memory = [] self.path_memory = [] self.bbox_memory = [] self.num_centroid = num_centroid self.clustering_interval = clustering_interval weight = torch.zeros((num_centroid, input_size)) weight = torch.zeros((num_centroid, 1)) weight = torch.zeros((num_centroid + num_cls, 1)) weight[:num_cls] = 1 self.cls_weight = nn.Embedding(num_centroid + num_cls, 1).from_pretrained(weight, freeze=True) self.turn_on = False self.step = 1 self.cluster_count = 1 self.pseudo_gt = None self.n_pseudo_gt = 0 self.n_sample = n_sample self.cluster_obj_thresh = cluster_obj_thresh self.cos_thresh = cos_thresh self.coupled_cos_thresh = coupled_cos_thresh self.coupled_obj_thresh = coupled_obj_thresh self.pos_class_thresh = pos_class_thresh self.nms_thresh = nms_thresh self.pal = np.random.random((1024, 3)) * 255 self.size_opt = 'lm' self.output_dir = output_dir g_list = glob.glob(os.path.join(self.output_dir, 'pseudo_gts', '*.pth')) if len(g_list) > 0: g_list = [ int(x.split('/')[-1].replace('.pth', '')) for x in g_list ] g = max(g_list) path = os.path.join(self.output_dir, 'pseudo_gts/{}.pth').format(g) self.pseudo_gt = torch.load(path) self.n_pseudo_gt = len(self.pseudo_gt) self.step = g + 1 if self.pseudo_gt is not None and len(self.pseudo_gt) > 0: label = int(self.pseudo_gt[:, 1].max()) weight[:label] = 1 self.cls_weight = nn.Embedding(num_centroid + num_cls, 1).from_pretrained(weight, freeze=True)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.backbone_2 = None #Jamie if cfg.INPUT.NUM_IN_CHANNELS != 3: # Middle fusion if cfg.INPUT.FORMAT == 'BGRTTT' or cfg.INPUT.FORMAT == 'BGRTTT_perturb': # middle fusion input_shape = ShapeSpec(channels=3) self.backbone_2 = build_backbone(cfg, input_shape) else: # Early fusion input_shape = ShapeSpec(channels=cfg.INPUT.NUM_IN_CHANNELS) # Jamie #if cfg.INPUT.FORMAT = 'BGRTTT': # self.backbone = self.backbone = build_backbone(cfg, input_shape) num_channels = cfg.INPUT.NUM_IN_CHANNELS #Jamie print(num_channels, ' channel input') else: # RGB or thermal only print('3 channel input') self.backbone = build_backbone(cfg) num_channels = len(cfg.MODEL.PIXEL_MEAN) #import pdb; pdb.set_trace() if cfg.INPUT.FORMAT == 'BGRTTT' or cfg.INPUT.FORMAT == 'BGRTTT_perturb': output_shape = {} for key in self.backbone.output_shape().keys(): temp_num_channel = self.backbone.output_shape( )[key].channels * 2 temp_stride = self.backbone.output_shape()[key].stride output_shape[key] = ShapeSpec(channels=temp_num_channel, stride=temp_stride) self.proposal_generator = build_proposal_generator( cfg, output_shape) self.roi_heads = build_roi_heads(cfg, output_shape) del output_shape pixel_mean_RGB = torch.Tensor(cfg.MODEL.PIXEL_MEAN[:3]).to( self.device).view(3, 1, 1) pixel_mean_thermal = torch.Tensor(cfg.MODEL.PIXEL_MEAN[3:]).to( self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD[:3]).to( self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean_RGB) / pixel_std self.normalizer_thermal = lambda x: (x - pixel_mean_thermal ) / pixel_std else: self.proposal_generator = build_proposal_generator( cfg, self.backbone.output_shape()) self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to( self.device).view(num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) self.to(self.device) # Jamie self.blur_rgb = False if cfg.MODEL.BLUR_RGB: self.blur_rgb = True self.max_pool_rgb = False if cfg.MODEL.MAX_POOL_RGB: self.max_pool_rgb = True """
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = { "loss_cls": loss_weight, "loss_box_reg": loss_weight } self.loss_weight = loss_weight
def __init__(self, cfg=None, load_path=None, depth=101, vec_dim=128, max_pool=False, clf1_num=None, clf2_num=None, adv_eta=None): super(ResNetbasedNet, self).__init__() self.load = True if load_path is not None else False self.clf1 = True if clf1_num is not None else False self.clf2 = True if clf2_num is not None else False self.adv_eta = Variable( torch.tensor(adv_eta).type(torch.float), requires_grad=False) if adv_eta is not None else None if cfg is not None: model = build_resnet_backbone( cfg, ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) pretrained_model = torch.load(cfg.MODEL.WEIGHTS) cur_state = model.state_dict() mapped_dict = {} for name, param in pretrained_model.items(): if name == 'model': for p in param: if p.replace('backbone.bottom_up.', '') in cur_state: mapped_dict[p.replace('backbone.bottom_up.', '')] = param[p] model.load_state_dict(mapped_dict) self.backbone = nn.Sequential(*list(model.children())) else: model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet{}'.format(depth), pretrained=not self.load) self.backbone = nn.Sequential(*list(model.children())[:-2]) self.max_pool = nn.AdaptiveMaxPool2d( (1, 1)) if max_pool else nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(2048, vec_dim) if self.clf1: self.clf1_layer = nn.Sequential(nn.Linear(vec_dim, vec_dim), nn.BatchNorm1d(vec_dim), nn.ReLU(), nn.Linear(vec_dim, clf1_num)) if self.clf2: self.clf2_layer = nn.Sequential(nn.Linear(vec_dim, vec_dim), nn.BatchNorm1d(vec_dim), nn.ReLU(), nn.Linear(vec_dim, clf2_num)) if self.load: load_model = torch.load(load_path) mapped_dict = { 'backbone': (self.backbone, {}), 'fc': (self.fc, {}) } if self.clf1: mapped_dict['clf1_layer'] = (self.clf1_layer, {}) if self.clf2: # print(self.clf2_layer.state_dict()) mapped_dict['clf2_layer'] = (self.clf2_layer, {}) for name, param in load_model.items(): if name.split('.')[0] in mapped_dict.keys(): mapped_dict[name.split('.')[0]][1]['.'.join( name.split('.')[1:])] = param for layers in mapped_dict.keys(): mapped_dict[layers][0].load_state_dict(mapped_dict[layers][1])
def __init__( self, input_shape, *, standard_cls_bone, std_num_classes, std_cls_emb_dim, box2box_transform, num_classes, arc_args={}, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, category_loss_type='cross_entropy', std_cls_loss_type='softmax', cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super(MlabelStandardFastRCNNOutputLayer2, self).__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) # 大类别分类 self.category_score = nn.Sequential( Flatten(), Linear(input_size, num_classes + 1)) # box回归 num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = nn.Sequential( Flatten(), Linear(input_size, num_bbox_reg_classes * box_dim)) # 细分类 self.standard_cls_bone = standard_cls_bone if std_cls_loss_type == 'softmax': self.std_cls_score = Linear(std_cls_emb_dim, std_num_classes + 1) nn.init.normal_(self.std_cls_score.weight, std=0.01) nn.init.constant_(self.std_cls_score.bias, 0) elif std_cls_loss_type == 'arc': self.std_cls_score = ArcLayer(std_cls_emb_dim, std_num_classes + 1, s=arc_args['s'], m=arc_args['m'], easy_margin=arc_args['easy_margin']) else: raise NotImplementedError('目前仅支持softmax、arc两种模式,暂不支持{}'.format( std_cls_loss_type, )) for pairs in [ self.standard_cls_bone.named_parameters(), self.category_score.named_parameters(), self.bbox_pred.named_parameters() ]: for name, params in pairs: if 'weight' in name: nn.init.normal_(params, std=0.01) elif 'bias' in name: nn.init.constant_(params, 0.) self.std_cls_loss_type = std_cls_loss_type self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight self.std_cls_loss_type = std_cls_loss_type self.category_loss_type = category_loss_type
def output_shape(self): return {f"stride{s}": ShapeSpec(channels=self._out_feature_channels[k], stride=s) for k, s in self._out_feature_strides.items()}