Exemple #1
0
    def __init__(self):
        super().__init__()

        # For cache
        self.mapping_to_detectron = None
        self.orphans_in_detectron = None

        self.use_gt_boxes = True  # a temporary setting

        # Backbone for feature extraction
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # Region Proposal Network
        if not self.use_gt_boxes and cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[
                -self.num_roi_levels:]

        # BBOX Branch
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.Conv_Body.dim_out, self.roi_feature_transform,
            self.Conv_Body.spatial_scale)
        # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)
        if not self.use_gt_boxes:
            self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs(
                self.Box_Head.dim_out)

        self.Prd_RCNN = copy.deepcopy(self)
        if not self.use_gt_boxes:
            del self.Prd_RCNN.RPN
            del self.Prd_RCNN.Box_Outs

        # initialize word vectors
        ds_name = cfg.TRAIN.DATASETS[0] if len(
            cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
        self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name)

        # RelPN
        self.RelPN = relpn_heads.generic_relpn_outputs()
        # RelDN
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3,
                                            self.obj_vecs,
                                            self.prd_vecs)  # concat of SPO

        self._init_modules()
    def __init__(self):
        super().__init__()

        # For cache
        self.mapping_to_detectron = None
        self.orphans_in_detectron = None

        # Backbone for feature extraction
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # Region Proposal Network
        if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[
                -self.num_roi_levels:]

        # BBOX Branch
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.Conv_Body.dim_out, 4096, self.roi_feature_transform,
            self.Conv_Body.spatial_scale)
        # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)
        if not cfg.TRAIN.USE_GT_BOXES:
            self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs(
                self.Box_Head.dim_out)

        self.Prd_RCNN = copy.deepcopy(self)
        if not cfg.TRAIN.USE_GT_BOXES:
            del self.Prd_RCNN.RPN
            del self.Prd_RCNN.Box_Outs

        # initialize word vectors
        ds_name = cfg.TRAIN.DATASETS[0] if len(
            cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
        obj_categories, prd_categories = get_obj_prd_vecs(ds_name)

        # RelPN
        self.RelPN = relpn_heads.generic_relpn_outputs()
        # RelDN
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out *
                                            3)  # concat of SPO

        self.prd_weights = None
        self.obj_weights = None

        if cfg.DATASET == 'gvqa10k':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'gvqa20k':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'gvqa':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'vg80k':
            freq_prd_path = cfg.DATA_DIR + '/vg/predicates_freqs.json'
            freq_obj_path = cfg.DATA_DIR + '/vg/objects_freqs.json'
        elif cfg.DATASET == 'vg8k':
            freq_prd_path = cfg.DATA_DIR + '/vg8k/seed{}/train_predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/vg8k/seed{}/train_objects_freqs.json'.format(
                cfg.RNG_SEED)
        else:
            raise NotImplementedError

        self.prd_freq_dict = json.load(open(freq_prd_path))
        self.obj_freq_dict = json.load(open(freq_obj_path))

        no_bg_prd_categories = prd_categories[1:]

        assert len(no_bg_prd_categories) == cfg.MODEL.NUM_PRD_CLASSES

        self.prd_categories = no_bg_prd_categories
        self.obj_categories = obj_categories

        self.freq_prd = get_freq_from_dict(self.prd_freq_dict,
                                           self.prd_categories)
        self.freq_obj = get_freq_from_dict(self.obj_freq_dict,
                                           self.obj_categories)

        if cfg.MODEL.LOSS == 'weighted_cross_entropy':
            logger.info('loading frequencies')

            freq_prd = self.freq_prd + 1
            freq_obj = self.freq_obj + 1
            prd_weights = np.sum(freq_prd) / freq_prd
            obj_weights = np.sum(freq_obj) / freq_obj

            self.prd_weights = (prd_weights / np.mean(prd_weights)).astype(
                np.float32)
            self.obj_weights = (obj_weights / np.mean(obj_weights)).astype(
                np.float32)
            temp = np.zeros(shape=self.prd_weights.shape[0] + 1,
                            dtype=np.float32)
            temp[1:] = self.prd_weights
            temp[0] = min(self.prd_weights)
            self.prd_weights = temp
        self._init_modules()
    def __init__(self, category_to_id_map, prd_category_to_id_map, args=None):
        super().__init__()

        self.mapping_to_detectron = None
        self.orphans_in_detectron = None
        self.category_to_id_map = category_to_id_map
        self.prd_category_to_id_map = prd_category_to_id_map
        self.args = args

        # -------------------------------------------------------------------------------------------------------------------------------
        # initialize word vectors
        # -------------------------------------------------------------------------------------------------------------------------------
        ds_name = cfg.TRAIN.DATASETS[0] if len(
            cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
        self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(
            ds_name, self.category_to_id_map, self.prd_category_to_id_map)

        # -------------------------------------------------------------------------------------------------------------------------------
        # Backbone for feature extraction
        # -------------------------------------------------------------------------------------------------------------------------------
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # -------------------------------------------------------------------------------------------------------------------------------
        # Region Proposal Network
        # -------------------------------------------------------------------------------------------------------------------------------
        if cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[
                -self.num_roi_levels:]

        # -------------------------------------------------------------------------------------------------------------------------------
        # BBOX Branch
        # -------------------------------------------------------------------------------------------------------------------------------
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.RPN.dim_out, self.roi_feature_transform,
            self.Conv_Body.spatial_scale)

        # -------------------------------------------------------------------------------------------------------------------------------
        # RelPN
        # -------------------------------------------------------------------------------------------------------------------------------
        self.RelPN = relpn_heads.generic_relpn_outputs()

        # -------------------------------------------------------------------------------------------------------------------------------
        # RelDN
        # -------------------------------------------------------------------------------------------------------------------------------
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out,
                                            self.obj_vecs, self.prd_vecs)
        self.reldn_heads = reldn_heads

        # -------------------------------------------------------------------------------------------------------------------------------
        # triplets
        # -------------------------------------------------------------------------------------------------------------------------------
        if cfg.BINARY_LOSS or cfg.EVAL_MAP:
            if 'vhico' in self.args.dataset:
                if cfg.EVAL_SUBSET == 'test':
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_TEST, 'rb'))
                    # self.video_name_triplet_dict = pickle.load(open(TRIPLET_TRAIN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_TEST))
                elif cfg.EVAL_SUBSET == 'unseen':
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_UNSEEN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_UNSEEN))
                else:
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_TRAIN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_TRAIN))

        # -------------------------------------------------------------------------------------------------------------------------------
        # initialize model
        # -------------------------------------------------------------------------------------------------------------------------------
        self._init_modules()
Exemple #4
0
    def __init__(self):
        super().__init__()

        # For cache
        self.mapping_to_detectron = None
        self.orphans_in_detectron = None

        # Backbone for feature extraction
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # Region Proposal Network
        if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        self.conv5_dim_out = 2048
        self.prd_dim_out = self.conv5_dim_out * 3
        # BBOX Branch
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.Conv_Body.dim_out, self.conv5_dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)
            # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)

        self.Prd_RCNN = copy.deepcopy(self)
        if not cfg.TRAIN.USE_GT_BOXES:
            del self.Prd_RCNN.RPN
            del self.Prd_RCNN.Box_Outs


        # RelPN
        self.RelPN = relpn_heads.generic_relpn_outputs()

        if cfg.MODEL.MEMORY_MODULE_STAGE == 1:
            stage1_weights = False
        elif cfg.MODEL.MEMORY_MODULE_STAGE == 2:
            stage1_weights = True
        else:
            raise NotImplementedError
        self.sbj_obj_centroids = None
        self.prd_centroids = None
        if cfg.MODEL.MEMORY_MODULE_STAGE == 2:
            #sbj_obj_centroids = np.load(cfg.MODEL.SBJ_OBJ_CENTROIDS_PATH)
            #prd_centroids = np.load(cfg.MODEL.PRD_CENTROIDS_PATH)
            #self.sbj_obj_centroids = torch.Variable(torch.from_numpy(sbj_obj_centroids))
            #self.prd_centroids = torch.Variable(torch.from_numpy(prd_centroids))
            self.sbj_obj_centroids = torch.zeros(cfg.MODEL.NUM_CLASSES - 1, self.conv5_dim_out)
            self.prd_centroids = torch.zeros(cfg.MODEL.NUM_PRD_CLASSES + 1, self.prd_dim_out)
        # Initialize Centroids
        classifier_param = {'in_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1,
                            'stage1_weights': stage1_weights, 'dataset': cfg.DATASET}
        classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005}
        classifier_params = {'params': classifier_param,
                             'optim_params': classifier_optim_param}
        model_args = list(classifier_params['params'].values())
        model_args.append(not self.training)

        # depending on whether we are training stage 1 or 2 we set the classifier. For stage 1 we simply set the classifier to
        # a dot product classifier, and for stage 2 we set it to meta_embedding_classifier, which includes the memory module.
        if cfg.MODEL.MEMORY_MODULE_STAGE == 1:
            self.classifier = dot_product_classifier.create_model(*model_args)
        elif cfg.MODEL.MEMORY_MODULE_STAGE == 2:
            self.classifier = meta_embedding_classifier.create_model(*model_args)
        else:
            raise NotImplementedError

        # self.classifier = nn.DataParallel(self.classifier).to(self.device)

        prd_classifier_param = {'in_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1,
                            'stage1_weights': stage1_weights, 'dataset': cfg.DATASET}

        prd_classifier_optim_param = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0005}
        prd_classifier_params = {'params': prd_classifier_param,
                                 'optim_params': prd_classifier_optim_param}
        prd_model_args = list(prd_classifier_params['params'].values())
        prd_model_args.append(not self.training)
        prd_model_args.append(True)

        if cfg.MODEL.MEMORY_MODULE_STAGE == 1:
            self.prd_classifier = dot_product_classifier.create_model(*prd_model_args)
        # self.classifier = nn.DataParallel(self.classifier).to(self.device)
        elif cfg.MODEL.MEMORY_MODULE_STAGE == 2:
            self.prd_classifier = meta_embedding_classifier.create_model(*prd_model_args)
        else:
            raise NotImplementedError

        self.feature_loss_sbj_obj  = None
        self.feature_loss_prd  = None
        if cfg.MODEL.MEMORY_MODULE_STAGE == 2:
            feat_loss_param_sbj_obj = {'feat_dim': self.conv5_dim_out, 'num_classes': cfg.MODEL.NUM_CLASSES - 1}
            loss_args_sbj_obj = feat_loss_param_sbj_obj.values()
            self.feature_loss_sbj_obj = disc_centroids_loss.create_loss(*loss_args_sbj_obj)
            self.feature_loss_weight_sbj_obj = 0.01

            feat_loss_param_prd = {'feat_dim': self.prd_dim_out, 'num_classes': cfg.MODEL.NUM_PRD_CLASSES + 1}
            loss_args_prd = feat_loss_param_prd.values()
            self.feature_loss_prd = disc_centroids_loss.create_loss(*loss_args_prd)
            self.feature_loss_weight_prd = 0.01


        self._init_modules()