def __init__(self, roi_feature_transform, pretrained_weights=None): assert cfg.GAN.GAN_MODE_ON super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) self.roi_pool = net_utils.roiPoolingLayer(roi_feature_transform, self.Conv_Body.spatial_scale, self.Conv_Body.resolution) self.Generator_Block = GeneratorBlock( roi_feature_transform, self.Conv_Body.spatial_scale_base, self.Conv_Body.resolution, self.Conv_Body.dim_out_base, self.Conv_Body.dim_out) self._init_modules(pretrained_weights)
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:] # BBOX Branch #hw RPN output to ROIPool layer if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.FASTER_RCNN: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( # hw ResNet_roi_conv5_head RPN网络输出roi和prob到ROIAlgin层 self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs(#hw 最后输出avgpool并输入到进入全连接层输出box和prob self.Box_Head.dim_out) elif cfg.MODEL.LIGHT_HEAD_RCNN:#hw from modeling.light_head_rcnn_heads import ResNet_Conv5_light_head self.LightHead = ResNet_Conv5_light_head(self.Conv_Body.dim_out) self.Box_Outs = light_head_rcnn_heads.light_head_rcnn_outputs(490,self.roi_feature_transform,self.Conv_Body.spatial_scale) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs(self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs(self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Amodal_Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Amodal_Mask_Head, 'SHARE_RES5', False): self.Amodal_Mask_Head.share_res5_module(self.Box_Head.res5) self.Amodal_Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Amodal_Mask_Head.dim_out) # Inmodal Branch if cfg.MODEL.INMODAL_ON: self.Inmodal_Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Inmodal_Mask_Head, 'SHARE_RES5', False): self.Inmodal_Mask_Head.share_res5_module(self.Box_Head.res5) self.Inmodal_Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Inmodal_Mask_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None self.use_gt_boxes = True # a temporary setting # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not self.use_gt_boxes and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not self.use_gt_boxes: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not self.use_gt_boxes: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3, self.obj_vecs, self.prd_vecs) # concat of SPO self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs(self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs(self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, 4096, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not cfg.TRAIN.USE_GT_BOXES: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not cfg.TRAIN.USE_GT_BOXES: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] obj_categories, prd_categories = get_obj_prd_vecs(ds_name) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3) # concat of SPO self.prd_weights = None self.obj_weights = None if cfg.DATASET == 'gvqa10k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa20k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa': freq_prd_path = cfg.DATA_DIR + '/gvqa/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'vg80k': freq_prd_path = cfg.DATA_DIR + '/vg/predicates_freqs.json' freq_obj_path = cfg.DATA_DIR + '/vg/objects_freqs.json' elif cfg.DATASET == 'vg8k': freq_prd_path = cfg.DATA_DIR + '/vg8k/seed{}/train_predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/vg8k/seed{}/train_objects_freqs.json'.format( cfg.RNG_SEED) else: raise NotImplementedError self.prd_freq_dict = json.load(open(freq_prd_path)) self.obj_freq_dict = json.load(open(freq_obj_path)) no_bg_prd_categories = prd_categories[1:] assert len(no_bg_prd_categories) == cfg.MODEL.NUM_PRD_CLASSES self.prd_categories = no_bg_prd_categories self.obj_categories = obj_categories self.freq_prd = get_freq_from_dict(self.prd_freq_dict, self.prd_categories) self.freq_obj = get_freq_from_dict(self.obj_freq_dict, self.obj_categories) if cfg.MODEL.LOSS == 'weighted_cross_entropy': logger.info('loading frequencies') freq_prd = self.freq_prd + 1 freq_obj = self.freq_obj + 1 prd_weights = np.sum(freq_prd) / freq_prd obj_weights = np.sum(freq_obj) / freq_obj self.prd_weights = (prd_weights / np.mean(prd_weights)).astype( np.float32) self.obj_weights = (obj_weights / np.mean(obj_weights)).astype( np.float32) temp = np.zeros(shape=self.prd_weights.shape[0] + 1, dtype=np.float32) temp[1:] = self.prd_weights temp[0] = min(self.prd_weights) self.prd_weights = temp self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction ''' FPN.fpn_ResNet50_conv5_body for example get_func will get the function by name here is where FPN_ResNet lay functions are called It returns a ResNet that has been associated with FPN ''' self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network ''' Fast RCNN will have RPN on according to config.py HERE IS WHERE THE RPN CREATED ''' if cfg.RPN.RPN_ON: ''' generic_rpn_outputs calls rpn_heads.FPN.fpn_rpn_outputs, which creates the head subnets, builds anchor of different levels ''' self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch '''when created, RPN_ONLY is set to false by default''' if not cfg.MODEL.RPN_ONLY: '''in faster rcnn fpn r-50 case it is fast_rcnn_heads.roi_2mlp_head''' self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs( self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self, ds=None): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # BBOX Branch for finer car model classification if cfg.MODEL.CAR_CLS_HEAD_ON: self.car_cls_Head = get_func(cfg.CAR_CLS.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.car_cls_Outs = car_3d_pose_heads.fast_rcnn_outputs_car_cls_rot( self.car_cls_Head.dim_out) self.shape_sim_mat = np.loadtxt('./utilities/sim_mat.txt') # TRANS Branch for car translation regression if cfg.MODEL.TRANS_HEAD_ON: if cfg.TRANS_HEAD.INPUT_CONV_BODY: self.car_trans_Head = get_func(cfg.TRANS_HEAD.TRANS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale, cfg.TRANS_HEAD.INPUT_DIM) self.car_trans_Outs = car_3d_pose_heads.car_trans_outputs( self.car_trans_Head.dim_out) elif cfg.TRANS_HEAD.INPUT_TRIPLE_HEAD: # We use the 1024 dim from car_cls+rot head self.car_trans_Head = get_func(cfg.TRANS_HEAD.TRANS_HEAD)( cfg.TRANS_HEAD.INPUT_DIM) self.car_trans_Outs = car_3d_pose_heads.car_trans_triple_outputs( self.car_trans_Head.dim_out, self.car_cls_Head.dim_out) else: self.car_trans_Head = get_func(cfg.TRANS_HEAD.TRANS_HEAD)( cfg.TRANS_HEAD.INPUT_DIM) self.car_trans_Outs = car_3d_pose_heads.car_trans_outputs( self.car_trans_Head.dim_out) # 3D to 2D projection error for multi-loss if cfg.MODEL.LOSS_3D_2D_ON: self.car_models = ds.load_car_models() self.car_names = ds.unique_car_names self.intrinsic_mat = ds.get_intrinsic_mat() # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs( self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() print("import {}".format(cfg.NETWORK_NAME)) if cfg.NETWORK_NAME == 'PMFNet_Baseline': from modeling.hoi import PMFNet_Baseline as HOI elif cfg.NETWORK_NAME == 'PMFNet_Final': from modeling.hoi import PMFNet_Final as HOI # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) if cfg.MODEL.VCOCO_ON: if cfg.FPN.MULTILEVEL_ROIS: self.hoi_spatial_scale = self.Conv_Body.spatial_scale else: self.hoi_spatial_scale = self.Conv_Body.spatial_scale[-1] self.HOI_Head = HOI(self.Conv_Body.dim_out, self.roi_feature_transform, self.hoi_spatial_scale) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs( self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Layers for left-right view # Added by zhangfandong # LR_VIEW and GIF should not be on at the same time assert (not cfg.MODEL.LR_VIEW_ON) or (not cfg.MODEL.GIF_ON) or ( not cfg.MODEL.LRASY_MAHA_ON) if cfg.MODEL.LR_VIEW_ON: #assert cfg.FPN.FPN_ON is False self.lr_view_net = lrview_net_body.get_lrv_net( self.Conv_Body.dim_out * 2, self.Conv_Body.dim_out, cfg.LR_VIEW.FUSION_NET) elif cfg.MODEL.LRASY_MAHA_ON: self.lrasy_maha_net = lrview_net_body.get_lrvasymaha_net( self.Conv_Body.dim_out, self.Conv_Body.dim_out, cfg.LR_VIEW.FUSION_NET) elif cfg.MODEL.GIF_ON: self.gif_net = gif_net_body.get_gif_net(self.Conv_Body.dim_out) # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same #assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) if cfg.LESION.POSITION_RCNN: self.Position_RCNN = position_outputs(self.Box_Head.dim_out) # Position Branch if cfg.LESION.USE_POSITION: #self.Position_Head = position_Xconv1fc_gn_head(2048,1024,2) self.Position_Head = position_Xconv1fc_gn_head(2048, 1024, 3) self.Position_Cls_Outs = position_cls_outputs( self.Position_Head.dim_out) self.Position_Reg_Outs = position_reg_outputs( self.Position_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs( self.Keypoint_Head.dim_out) self._init_modules()
def __init__(self): super().__init__() # Stop forwarding from Conv-GRU module. self.stop_after_hidden_states = False # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None self.update_hidden_states = True # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() assert (cfg.FPN.FPN_ON) #insert temporal module for video object segmentation. #As we use resNet, dims for 5 levels are hard coded. #fpn_dims=(2048, 1024, 512, 256, 64) fpn_dims = [cfg.FPN.DIM] * 5 h_channels = cfg.CONVGRU.HIDDEN_STATE_CHANNELS self.ConvGRUs = nn.ModuleList() for i in range(len(fpn_dims)): self.ConvGRUs.append( ConvGRUCell2d(fpn_dims[i], h_channels[i], kernel_size=3, stride=1, dilation=1, groups=1, use_GN=True, GN_groups=32)) self.flow_features = None if cfg.MODEL.USE_DELTA_FLOW: self.flow_features = [None] * 5 self.Conv_Delta_Flows_Features = nn.ModuleList() #TODO add initialization. for i in range(len(fpn_dims)): self.Conv_Delta_Flows_Features.append( nn.Sequential( nn.Conv2d(fpn_dims[i], 2, kernel_size=3, stride=1, padding=1, bias=False), nn.Tanh())) self.hidden_states = [None, None, None, None, None] # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not cfg.MODEL.IDENTITY_TRAINING: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) else: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs( self.Mask_Head.dim_out, cfg.MODEL.NUM_CLASSES) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs( self.Keypoint_Head.dim_out) if cfg.CONVGRU.DYNAMIC_MODEL: self.FlowAligns = nn.ModuleList() self.fpn_scales = [1. / 64., 1. / 32., 1. / 16., 1. / 8., 1. / 4.] for i in range(len(self.fpn_scales)): self.FlowAligns.append(FlowAlign(self.fpn_scales[i])) self._init_modules()
def __init__(self, category_to_id_map, prd_category_to_id_map, args=None): super().__init__() self.mapping_to_detectron = None self.orphans_in_detectron = None self.category_to_id_map = category_to_id_map self.prd_category_to_id_map = prd_category_to_id_map self.args = args # ------------------------------------------------------------------------------------------------------------------------------- # initialize word vectors # ------------------------------------------------------------------------------------------------------------------------------- ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs( ds_name, self.category_to_id_map, self.prd_category_to_id_map) # ------------------------------------------------------------------------------------------------------------------------------- # Backbone for feature extraction # ------------------------------------------------------------------------------------------------------------------------------- self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # ------------------------------------------------------------------------------------------------------------------------------- # Region Proposal Network # ------------------------------------------------------------------------------------------------------------------------------- if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # ------------------------------------------------------------------------------------------------------------------------------- # BBOX Branch # ------------------------------------------------------------------------------------------------------------------------------- self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # ------------------------------------------------------------------------------------------------------------------------------- # RelPN # ------------------------------------------------------------------------------------------------------------------------------- self.RelPN = relpn_heads.generic_relpn_outputs() # ------------------------------------------------------------------------------------------------------------------------------- # RelDN # ------------------------------------------------------------------------------------------------------------------------------- self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out, self.obj_vecs, self.prd_vecs) self.reldn_heads = reldn_heads # ------------------------------------------------------------------------------------------------------------------------------- # triplets # ------------------------------------------------------------------------------------------------------------------------------- if cfg.BINARY_LOSS or cfg.EVAL_MAP: if 'vhico' in self.args.dataset: if cfg.EVAL_SUBSET == 'test': self.video_name_triplet_dict = pickle.load( open(TRIPLET_TEST, 'rb')) # self.video_name_triplet_dict = pickle.load(open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TEST)) elif cfg.EVAL_SUBSET == 'unseen': self.video_name_triplet_dict = pickle.load( open(TRIPLET_UNSEEN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_UNSEEN)) else: self.video_name_triplet_dict = pickle.load( open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TRAIN)) # ------------------------------------------------------------------------------------------------------------------------------- # initialize model # ------------------------------------------------------------------------------------------------------------------------------- self._init_modules()
def init_detectron(self, args): if 'datasets' in sys.modules: del sys.modules['datasets'] this_dir = os.path.dirname(__file__) lib_path = os.path.join(this_dir, '../../external/Detectron.pytorch/lib') sys.path.insert(0, lib_path) # config self.spatial_scale = 1. / 16 self.dim_out = 832 from core.config import cfg self.cfg = cfg cfg.MODEL.FASTER_RCNN = True cfg.MODEL.NUM_CLASSES = args.nclass cfg.MODEL.CLS_AGNOSTIC_BBOX_REG = True cfg.MODEL.FASTER_RCNN = True cfg.MODEL.TYPE = "generalized_rcnn" cfg.RPN.CLS_ACTIVATION = 'sigmoid' cfg.RPN.SIZES = (32, 64, 128, 256, 512) cfg.RPN.STRIDE = 16 cfg.FPN.COARSEST_STRIDE = 16 # bugfix, anchor stride depends on FPN parameters cfg.TRAIN.BATCH_SIZE_PER_IM = 30 cfg.TRAIN.IMS_PER_BATCH = 2 cfg.TRAIN.MAX_SIZE = 400 cfg.TRAIN.RPN_BATCH_SIZE_PER_IM = 256 cfg.TEST.RPN_POST_NMS_TOP_N = 30 cfg.TEST.DETECTIONS_PER_IM = 0 # turn off image cap (for rare classes) cfg.TEST.SCORE_THRESH = 0 # include all bounding boxes (for rare classes) cfg.FAST_RCNN.ROI_XFORM_METHOD = "RoIAlign" cfg.FAST_RCNN.ROI_XFORM_RESOLUTION = 7 cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO = 2 # bind classes/methods from Detectron from modeling.model_builder import Generalized_RCNN from roi_data.rpn import add_rpn_blobs import modeling.rpn_heads as rpn_heads from core.test import box_results_with_nms_and_limit import utils.boxes as box_utils import utils.vis as vis_utils self.vis_utils = vis_utils self.add_rpn_blobs = add_rpn_blobs self.box_utils = box_utils self.box_results_with_nms_and_limit = box_results_with_nms_and_limit self.RPN = rpn_heads.generic_rpn_outputs(self.dim_out, self.spatial_scale) self.roi_xform = lambda x, rpn_ret: Generalized_RCNN.roi_feature_transform( self, x, rpn_ret, blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION, spatial_scale=self.spatial_scale, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO) del sys.modules['datasets'] sys.path.pop(0) self.head_batch_size = 10
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) self.conv5_dim_out = 2048 self.prd_dim_out = self.conv5_dim_out * 3 # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, self.conv5_dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Prd_RCNN = copy.deepcopy(self) if not cfg.TRAIN.USE_GT_BOXES: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() if cfg.MODEL.MEMORY_MODULE_STAGE == 1: stage1_weights = False elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: stage1_weights = True else: raise NotImplementedError self.sbj_obj_centroids = None self.prd_centroids = None if cfg.MODEL.MEMORY_MODULE_STAGE == 2: #sbj_obj_centroids = np.load(cfg.MODEL.SBJ_OBJ_CENTROIDS_PATH) #prd_centroids = np.load(cfg.MODEL.PRD_CENTROIDS_PATH) #self.sbj_obj_centroids = torch.Variable(torch.from_numpy(sbj_obj_centroids)) #self.prd_centroids = torch.Variable(torch.from_numpy(prd_centroids)) self.sbj_obj_centroids = torch.zeros(cfg.MODEL.NUM_CLASSES - 1, self.conv5_dim_out) self.prd_centroids = torch.zeros(cfg.MODEL.NUM_PRD_CLASSES + 1, self.prd_dim_out) # Initialize Centroids classifier_param = {'in_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1, 'stage1_weights': stage1_weights, 'dataset': cfg.DATASET} classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005} classifier_params = {'params': classifier_param, 'optim_params': classifier_optim_param} model_args = list(classifier_params['params'].values()) model_args.append(not self.training) # depending on whether we are training stage 1 or 2 we set the classifier. For stage 1 we simply set the classifier to # a dot product classifier, and for stage 2 we set it to meta_embedding_classifier, which includes the memory module. if cfg.MODEL.MEMORY_MODULE_STAGE == 1: self.classifier = dot_product_classifier.create_model(*model_args) elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: self.classifier = meta_embedding_classifier.create_model(*model_args) else: raise NotImplementedError # self.classifier = nn.DataParallel(self.classifier).to(self.device) prd_classifier_param = {'in_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1, 'stage1_weights': stage1_weights, 'dataset': cfg.DATASET} prd_classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005} prd_classifier_params = {'params': prd_classifier_param, 'optim_params': prd_classifier_optim_param} prd_model_args = list(prd_classifier_params['params'].values()) prd_model_args.append(not self.training) prd_model_args.append(True) if cfg.MODEL.MEMORY_MODULE_STAGE == 1: self.prd_classifier = dot_product_classifier.create_model(*prd_model_args) # self.classifier = nn.DataParallel(self.classifier).to(self.device) elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: self.prd_classifier = meta_embedding_classifier.create_model(*prd_model_args) else: raise NotImplementedError self.feature_loss_sbj_obj = None self.feature_loss_prd = None if cfg.MODEL.MEMORY_MODULE_STAGE == 2: feat_loss_param_sbj_obj = {'feat_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1} loss_args_sbj_obj = feat_loss_param_sbj_obj.values() self.feature_loss_sbj_obj = disc_centroids_loss.create_loss(*loss_args_sbj_obj) self.feature_loss_weight_sbj_obj = 0.01 feat_loss_param_prd = {'feat_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1} loss_args_prd = feat_loss_param_prd.values() self.feature_loss_prd = disc_centroids_loss.create_loss(*loss_args_prd) self.feature_loss_weight_prd = 0.01 self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # rel pyramid connection if cfg.MODEL.USE_REL_PYRAMID: assert cfg.FPN.FPN_ON self.RelPyramid = rel_pyramid_module.rel_pyramid_module( self.num_roi_levels) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3) self._init_modules() # initialize S/O branches AFTER init_weigths so that weights can be automatically copied if cfg.MODEL.ADD_SO_SCORES: self.S_Head = copy.deepcopy(self.Box_Head) self.O_Head = copy.deepcopy(self.Box_Head) for p in self.S_Head.parameters(): p.requires_grad = True for p in self.O_Head.parameters(): p.requires_grad = True
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.ori_embed = get_ort_embeds(cfg.MODEL.NUM_CLASSES, 200) # rel pyramid connection if cfg.MODEL.USE_REL_PYRAMID: assert cfg.FPN.FPN_ON self.RelPyramid = rel_pyramid_module.rel_pyramid_module( self.num_roi_levels) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() self.Box_Head_sg = copy.deepcopy(self.Box_Head) self.Box_Head_prd = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD_PRD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.union_mask = reldn_heads.union_mask(self.RPN.dim_out) self.obj_dim = self.Box_Head.dim_out self.merge_obj_feats = Merge_OBJ_Feats(self.obj_dim, 200, 512) self.obj_mps1 = Message_Passing4OBJ(512) self.obj_mps2 = Message_Passing4OBJ(512) self.ObjClassifier = nn.Linear(512, cfg.MODEL.NUM_CLASSES) self.EdgePN = relpn_heads.single_scale_pairs_pn_outputs(False) self.get_phr_feats = nn.Linear(self.obj_dim, 512) self.sbj_map = nn.Linear(self.obj_dim + 200 + 5, self.obj_dim) self.sbj_map.weight = torch.nn.init.xavier_normal_(self.sbj_map.weight, gain=1.0) self.obj_map = nn.Linear(self.obj_dim + 200 + 5, self.obj_dim) self.obj_map.weight = torch.nn.init.xavier_normal_(self.obj_map.weight, gain=1.0) if cfg.MODEL.USE_BG: self.num_prd_classes = cfg.MODEL.NUM_PRD_CLASSES + 1 else: self.num_prd_classes = cfg.MODEL.NUM_PRD_CLASSES self.rel_compress = nn.Linear(self.obj_dim, self.num_prd_classes) self.rel_compress.weight = torch.nn.init.xavier_normal_( self.rel_compress.weight, gain=1.0) if cfg.MODEL.USE_FREQ_BIAS: # Assume we are training/testing on only one dataset if len(cfg.TRAIN.DATASETS): self.freq_bias = FrequencyBias_Fix(cfg.TRAIN.DATASETS[0]) else: self.freq_bias = FrequencyBias_Fix(cfg.TEST.DATASETS[0]) self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction if cfg.LOAD_IMAGENET: pretrained_backbone = True else: pretrained_backbone = False self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)(pretrained = pretrained_backbone) # Matching Mechanism if cfg.CO_ATTEN: self.match_net = matching.match_block(self.Conv_Body.dim_out) else: self.match_net = matching.l1_distance_match_block(self.Conv_Body.dim_out) #self.sa = attention.PAM_Module(self.Conv_Body.dim_out) #self.sc = attention.CAM_Module(self.Conv_Body.dim_out) #self.global_avgpool = nn.AdaptiveAvgPool2d(1) # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: if cfg.RELATION_RCNN: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs_co( self.Box_Head.dim_out) else: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs_wo_co( self.Box_Head.dim_out) else: if cfg.RELATION_RCNN: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs_co( self.Box_Head.dim_out) else: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs_wo_co( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs(self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs(self.Keypoint_Head.dim_out) self._init_modules() folds = { 'all': set(range(1, 15)), 1: set(range(1, 15)) - set(range(1, 3)), 2: set(range(1, 15)) - set(range(3, 6)), 3: set(range(1, 15)) - set(range(6, 9)), 4: set(range(1, 15)) - set(range(9, 11)), 5: set(range(1, 15)) - set(range(11, 15)), } if cfg.SEEN==1: cat_list = cfg.TRAIN.CATEGORIES # Group number to class if len(cat_list)==1: cat_list = list(folds[cat_list[0]])
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:] # BBOX Branch if not cfg.MODEL.RPN_ONLY: self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) # Mask Branch if cfg.MODEL.MASK_ON: self.Mask_Head = get_func(cfg.MRCNN.ROI_MASK_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Mask_Head, 'SHARE_RES5', False): self.Mask_Head.share_res5_module(self.Box_Head.res5) self.Mask_Outs = mask_rcnn_heads.mask_rcnn_outputs(self.Mask_Head.dim_out) # Keypoints Branch if cfg.MODEL.KEYPOINTS_ON: self.Keypoint_Head = get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if getattr(self.Keypoint_Head, 'SHARE_RES5', False): self.Keypoint_Head.share_res5_module(self.Box_Head.res5) self.Keypoint_Outs = keypoint_rcnn_heads.keypoint_outputs(self.Keypoint_Head.dim_out) self._init_modules() # Domain Discriminator Branch if cfg.TRAIN.DOMAIN_ADAPT_IM: self.DiscriminatorImage_Head = adversarial_heads.domain_discriminator_im( grl_scaler=cfg.TRAIN.GRL_SCALER) if cfg.TRAIN.DOMAIN_ADAPT_ROI: self.DiscriminatorRoi_Head = adversarial_heads.domain_discriminator_roi( grl_scaler=cfg.TRAIN.GRL_SCALER) # Learned attention for distillation lambda Branch if cfg.TRAIN.DISTILL_ATTN: self.AttentionRoi_Head = fast_rcnn_heads.distill_attention_head()