def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None self.use_gt_boxes = True # a temporary setting # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not self.use_gt_boxes and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not self.use_gt_boxes: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not self.use_gt_boxes: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3, self.obj_vecs, self.prd_vecs) # concat of SPO self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, 4096, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not cfg.TRAIN.USE_GT_BOXES: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not cfg.TRAIN.USE_GT_BOXES: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len(cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] if cfg.MODEL.INPUT_LANG_EMBEDDING_DIM == 300: self.obj_vecs, self.prd_vecs, obj_categories, prd_categories = get_obj_prd_vecs(ds_name) elif cfg.MODEL.INPUT_LANG_EMBEDDING_DIM == 600: self.obj_vecs, self.prd_vecs, obj_categories, prd_categories = get_obj_prd_gn_relco_vecs(ds_name) else: raise NotImplementedError # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3, self.obj_vecs, self.prd_vecs) # concat of SPO self.prd_weights = None self.obj_weights = None if cfg.DATASET == 'gvqa10k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa20k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa': freq_prd_path = cfg.DATA_DIR + '/gvqa/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'vg80k': freq_prd_path = cfg.DATA_DIR + '/vg/predicates_freqs.json' freq_obj_path = cfg.DATA_DIR + '/vg/objects_freqs.json' elif cfg.DATASET == 'vg8k': freq_prd_path = cfg.DATA_DIR + '/vg8k/seed{}/train_predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/vg8k/seed{}/train_objects_freqs.json'.format( cfg.RNG_SEED) else: raise NotImplementedError self.prd_freq_dict = json.load(open(freq_prd_path)) self.obj_freq_dict = json.load(open(freq_obj_path)) no_bg_prd_categories = prd_categories[1:] assert len(no_bg_prd_categories) == cfg.MODEL.NUM_PRD_CLASSES self.prd_categories = no_bg_prd_categories self.obj_categories = obj_categories self.freq_prd = get_freq_from_dict(self.prd_freq_dict, self.prd_categories) self.freq_obj = get_freq_from_dict(self.obj_freq_dict, self.obj_categories) if cfg.MODEL.LOSS == 'weighted_cross_entropy' or cfg.MODEL.LOSS == 'weighted_focal': logger.info('loading frequencies') freq_prd = self.freq_prd + 1 freq_obj = self.freq_obj + 1 prd_weights = np.sum(freq_prd) / freq_prd obj_weights = np.sum(freq_obj) / freq_obj self.prd_weights = (prd_weights / np.mean(prd_weights)).astype(np.float32) self.obj_weights = (obj_weights / np.mean(obj_weights)).astype(np.float32) temp = np.zeros(shape=self.prd_weights.shape[0] + 1, dtype=np.float32) temp[1:] = self.prd_weights temp[0] = min(self.prd_weights) self.prd_weights = temp self._init_modules()
def __init__(self, category_to_id_map, prd_category_to_id_map, args=None): super().__init__() self.mapping_to_detectron = None self.orphans_in_detectron = None self.category_to_id_map = category_to_id_map self.prd_category_to_id_map = prd_category_to_id_map self.args = args # ------------------------------------------------------------------------------------------------------------------------------- # initialize word vectors # ------------------------------------------------------------------------------------------------------------------------------- ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs( ds_name, self.category_to_id_map, self.prd_category_to_id_map) # ------------------------------------------------------------------------------------------------------------------------------- # Backbone for feature extraction # ------------------------------------------------------------------------------------------------------------------------------- self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # ------------------------------------------------------------------------------------------------------------------------------- # Region Proposal Network # ------------------------------------------------------------------------------------------------------------------------------- if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # ------------------------------------------------------------------------------------------------------------------------------- # BBOX Branch # ------------------------------------------------------------------------------------------------------------------------------- self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # ------------------------------------------------------------------------------------------------------------------------------- # RelPN # ------------------------------------------------------------------------------------------------------------------------------- self.RelPN = relpn_heads.generic_relpn_outputs() # ------------------------------------------------------------------------------------------------------------------------------- # RelDN # ------------------------------------------------------------------------------------------------------------------------------- self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out, self.obj_vecs, self.prd_vecs) self.reldn_heads = reldn_heads # ------------------------------------------------------------------------------------------------------------------------------- # triplets # ------------------------------------------------------------------------------------------------------------------------------- if cfg.BINARY_LOSS or cfg.EVAL_MAP: if 'vhico' in self.args.dataset: if cfg.EVAL_SUBSET == 'test': self.video_name_triplet_dict = pickle.load( open(TRIPLET_TEST, 'rb')) # self.video_name_triplet_dict = pickle.load(open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TEST)) elif cfg.EVAL_SUBSET == 'unseen': self.video_name_triplet_dict = pickle.load( open(TRIPLET_UNSEEN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_UNSEEN)) else: self.video_name_triplet_dict = pickle.load( open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TRAIN)) # ------------------------------------------------------------------------------------------------------------------------------- # initialize model # ------------------------------------------------------------------------------------------------------------------------------- self._init_modules()