def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None self.use_gt_boxes = True # a temporary setting # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not self.use_gt_boxes and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not self.use_gt_boxes: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not self.use_gt_boxes: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3, self.obj_vecs, self.prd_vecs) # concat of SPO self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, 4096, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) if not cfg.TRAIN.USE_GT_BOXES: self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs( self.Box_Head.dim_out) self.Prd_RCNN = copy.deepcopy(self) if not cfg.TRAIN.USE_GT_BOXES: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # initialize word vectors ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] obj_categories, prd_categories = get_obj_prd_vecs(ds_name) # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() # RelDN self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3) # concat of SPO self.prd_weights = None self.obj_weights = None if cfg.DATASET == 'gvqa10k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa20k': freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'gvqa': freq_prd_path = cfg.DATA_DIR + '/gvqa/seed{}/predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/gvqa/seed{}/objects_freqs.json'.format( cfg.RNG_SEED) elif cfg.DATASET == 'vg80k': freq_prd_path = cfg.DATA_DIR + '/vg/predicates_freqs.json' freq_obj_path = cfg.DATA_DIR + '/vg/objects_freqs.json' elif cfg.DATASET == 'vg8k': freq_prd_path = cfg.DATA_DIR + '/vg8k/seed{}/train_predicates_freqs.json'.format( cfg.RNG_SEED) freq_obj_path = cfg.DATA_DIR + '/vg8k/seed{}/train_objects_freqs.json'.format( cfg.RNG_SEED) else: raise NotImplementedError self.prd_freq_dict = json.load(open(freq_prd_path)) self.obj_freq_dict = json.load(open(freq_obj_path)) no_bg_prd_categories = prd_categories[1:] assert len(no_bg_prd_categories) == cfg.MODEL.NUM_PRD_CLASSES self.prd_categories = no_bg_prd_categories self.obj_categories = obj_categories self.freq_prd = get_freq_from_dict(self.prd_freq_dict, self.prd_categories) self.freq_obj = get_freq_from_dict(self.obj_freq_dict, self.obj_categories) if cfg.MODEL.LOSS == 'weighted_cross_entropy': logger.info('loading frequencies') freq_prd = self.freq_prd + 1 freq_obj = self.freq_obj + 1 prd_weights = np.sum(freq_prd) / freq_prd obj_weights = np.sum(freq_obj) / freq_obj self.prd_weights = (prd_weights / np.mean(prd_weights)).astype( np.float32) self.obj_weights = (obj_weights / np.mean(obj_weights)).astype( np.float32) temp = np.zeros(shape=self.prd_weights.shape[0] + 1, dtype=np.float32) temp[1:] = self.prd_weights temp[0] = min(self.prd_weights) self.prd_weights = temp self._init_modules()
def __init__(self, category_to_id_map, prd_category_to_id_map, args=None): super().__init__() self.mapping_to_detectron = None self.orphans_in_detectron = None self.category_to_id_map = category_to_id_map self.prd_category_to_id_map = prd_category_to_id_map self.args = args # ------------------------------------------------------------------------------------------------------------------------------- # initialize word vectors # ------------------------------------------------------------------------------------------------------------------------------- ds_name = cfg.TRAIN.DATASETS[0] if len( cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0] self.obj_vecs, self.prd_vecs = get_obj_prd_vecs( ds_name, self.category_to_id_map, self.prd_category_to_id_map) # ------------------------------------------------------------------------------------------------------------------------------- # Backbone for feature extraction # ------------------------------------------------------------------------------------------------------------------------------- self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # ------------------------------------------------------------------------------------------------------------------------------- # Region Proposal Network # ------------------------------------------------------------------------------------------------------------------------------- if cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) if cfg.FPN.FPN_ON: # Only supports case when RPN and ROI min levels are the same assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL # RPN max level can be >= to ROI max level assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL # FPN RPN max level might be > FPN ROI max level in which case we # need to discard some leading conv blobs (blobs are ordered from # max/coarsest level to min/finest level) self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale` # may include extra scales that are used for RPN proposals, but not for RoI heads. self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[ -self.num_roi_levels:] # ------------------------------------------------------------------------------------------------------------------------------- # BBOX Branch # ------------------------------------------------------------------------------------------------------------------------------- self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # ------------------------------------------------------------------------------------------------------------------------------- # RelPN # ------------------------------------------------------------------------------------------------------------------------------- self.RelPN = relpn_heads.generic_relpn_outputs() # ------------------------------------------------------------------------------------------------------------------------------- # RelDN # ------------------------------------------------------------------------------------------------------------------------------- self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out, self.obj_vecs, self.prd_vecs) self.reldn_heads = reldn_heads # ------------------------------------------------------------------------------------------------------------------------------- # triplets # ------------------------------------------------------------------------------------------------------------------------------- if cfg.BINARY_LOSS or cfg.EVAL_MAP: if 'vhico' in self.args.dataset: if cfg.EVAL_SUBSET == 'test': self.video_name_triplet_dict = pickle.load( open(TRIPLET_TEST, 'rb')) # self.video_name_triplet_dict = pickle.load(open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TEST)) elif cfg.EVAL_SUBSET == 'unseen': self.video_name_triplet_dict = pickle.load( open(TRIPLET_UNSEEN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_UNSEEN)) else: self.video_name_triplet_dict = pickle.load( open(TRIPLET_TRAIN, 'rb')) print( 'there are %d triplets in %s' % (len(self.video_name_triplet_dict['triplet_id_frame']), TRIPLET_TRAIN)) # ------------------------------------------------------------------------------------------------------------------------------- # initialize model # ------------------------------------------------------------------------------------------------------------------------------- self._init_modules()
def __init__(self): super().__init__() # For cache self.mapping_to_detectron = None self.orphans_in_detectron = None # Backbone for feature extraction self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)() # Region Proposal Network if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON: self.RPN = rpn_heads.generic_rpn_outputs( self.Conv_Body.dim_out, self.Conv_Body.spatial_scale) self.conv5_dim_out = 2048 self.prd_dim_out = self.conv5_dim_out * 3 # BBOX Branch self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)( self.Conv_Body.dim_out, self.conv5_dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale) self.Prd_RCNN = copy.deepcopy(self) if not cfg.TRAIN.USE_GT_BOXES: del self.Prd_RCNN.RPN del self.Prd_RCNN.Box_Outs # RelPN self.RelPN = relpn_heads.generic_relpn_outputs() if cfg.MODEL.MEMORY_MODULE_STAGE == 1: stage1_weights = False elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: stage1_weights = True else: raise NotImplementedError self.sbj_obj_centroids = None self.prd_centroids = None if cfg.MODEL.MEMORY_MODULE_STAGE == 2: #sbj_obj_centroids = np.load(cfg.MODEL.SBJ_OBJ_CENTROIDS_PATH) #prd_centroids = np.load(cfg.MODEL.PRD_CENTROIDS_PATH) #self.sbj_obj_centroids = torch.Variable(torch.from_numpy(sbj_obj_centroids)) #self.prd_centroids = torch.Variable(torch.from_numpy(prd_centroids)) self.sbj_obj_centroids = torch.zeros(cfg.MODEL.NUM_CLASSES - 1, self.conv5_dim_out) self.prd_centroids = torch.zeros(cfg.MODEL.NUM_PRD_CLASSES + 1, self.prd_dim_out) # Initialize Centroids classifier_param = {'in_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1, 'stage1_weights': stage1_weights, 'dataset': cfg.DATASET} classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005} classifier_params = {'params': classifier_param, 'optim_params': classifier_optim_param} model_args = list(classifier_params['params'].values()) model_args.append(not self.training) # depending on whether we are training stage 1 or 2 we set the classifier. For stage 1 we simply set the classifier to # a dot product classifier, and for stage 2 we set it to meta_embedding_classifier, which includes the memory module. if cfg.MODEL.MEMORY_MODULE_STAGE == 1: self.classifier = dot_product_classifier.create_model(*model_args) elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: self.classifier = meta_embedding_classifier.create_model(*model_args) else: raise NotImplementedError # self.classifier = nn.DataParallel(self.classifier).to(self.device) prd_classifier_param = {'in_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1, 'stage1_weights': stage1_weights, 'dataset': cfg.DATASET} prd_classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005} prd_classifier_params = {'params': prd_classifier_param, 'optim_params': prd_classifier_optim_param} prd_model_args = list(prd_classifier_params['params'].values()) prd_model_args.append(not self.training) prd_model_args.append(True) if cfg.MODEL.MEMORY_MODULE_STAGE == 1: self.prd_classifier = dot_product_classifier.create_model(*prd_model_args) # self.classifier = nn.DataParallel(self.classifier).to(self.device) elif cfg.MODEL.MEMORY_MODULE_STAGE == 2: self.prd_classifier = meta_embedding_classifier.create_model(*prd_model_args) else: raise NotImplementedError self.feature_loss_sbj_obj = None self.feature_loss_prd = None if cfg.MODEL.MEMORY_MODULE_STAGE == 2: feat_loss_param_sbj_obj = {'feat_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1} loss_args_sbj_obj = feat_loss_param_sbj_obj.values() self.feature_loss_sbj_obj = disc_centroids_loss.create_loss(*loss_args_sbj_obj) self.feature_loss_weight_sbj_obj = 0.01 feat_loss_param_prd = {'feat_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1} loss_args_prd = feat_loss_param_prd.values() self.feature_loss_prd = disc_centroids_loss.create_loss(*loss_args_prd) self.feature_loss_weight_prd = 0.01 self._init_modules()