예제 #1
0
    def __init__(self):
        super().__init__()

        # For cache
        self.mapping_to_detectron = None
        self.orphans_in_detectron = None

        self.use_gt_boxes = True  # a temporary setting

        # Backbone for feature extraction
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # Region Proposal Network
        if not self.use_gt_boxes and cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[
                -self.num_roi_levels:]

        # BBOX Branch
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.Conv_Body.dim_out, self.roi_feature_transform,
            self.Conv_Body.spatial_scale)
        # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)
        if not self.use_gt_boxes:
            self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs(
                self.Box_Head.dim_out)

        self.Prd_RCNN = copy.deepcopy(self)
        if not self.use_gt_boxes:
            del self.Prd_RCNN.RPN
            del self.Prd_RCNN.Box_Outs

        # initialize word vectors
        ds_name = cfg.TRAIN.DATASETS[0] if len(
            cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
        self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name)

        # RelPN
        self.RelPN = relpn_heads.generic_relpn_outputs()
        # RelDN
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3,
                                            self.obj_vecs,
                                            self.prd_vecs)  # concat of SPO

        self._init_modules()
예제 #2
0
    def __init__(self):
        super().__init__()

        # For cache
        self.mapping_to_detectron = None
        self.orphans_in_detectron = None

        # Backbone for feature extraction
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # Region Proposal Network
        if not cfg.TRAIN.USE_GT_BOXES and cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)
            
        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[-self.num_roi_levels:]

        # BBOX Branch
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.Conv_Body.dim_out, 4096, self.roi_feature_transform, self.Conv_Body.spatial_scale)
            # self.RPN.dim_out, self.roi_feature_transform, self.Conv_Body.spatial_scale)
        if not cfg.TRAIN.USE_GT_BOXES:
            self.Box_Outs = fast_rcnn_heads.fast_rcnn_outputs(
                self.Box_Head.dim_out)
            
        self.Prd_RCNN = copy.deepcopy(self)
        if not cfg.TRAIN.USE_GT_BOXES:
            del self.Prd_RCNN.RPN
            del self.Prd_RCNN.Box_Outs
        
        # initialize word vectors
        ds_name = cfg.TRAIN.DATASETS[0] if len(cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]

        if cfg.MODEL.INPUT_LANG_EMBEDDING_DIM == 300:
            self.obj_vecs, self.prd_vecs, obj_categories, prd_categories = get_obj_prd_vecs(ds_name)
        elif cfg.MODEL.INPUT_LANG_EMBEDDING_DIM == 600:
            self.obj_vecs, self.prd_vecs, obj_categories, prd_categories = get_obj_prd_gn_relco_vecs(ds_name)
        else:
            raise NotImplementedError

        # RelPN
        self.RelPN = relpn_heads.generic_relpn_outputs()
        # RelDN
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out * 3, self.obj_vecs, self.prd_vecs)  # concat of SPO

        self.prd_weights = None
        self.obj_weights = None

        if cfg.DATASET == 'gvqa10k':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/10k/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'gvqa20k':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/reduced_data/20k/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'gvqa':
            freq_prd_path = cfg.DATA_DIR + '/gvqa/seed{}/predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/gvqa/seed{}/objects_freqs.json'.format(
                cfg.RNG_SEED)
        elif cfg.DATASET == 'vg80k':
            freq_prd_path = cfg.DATA_DIR + '/vg/predicates_freqs.json'
            freq_obj_path = cfg.DATA_DIR + '/vg/objects_freqs.json'
        elif cfg.DATASET == 'vg8k':
            freq_prd_path = cfg.DATA_DIR + '/vg8k/seed{}/train_predicates_freqs.json'.format(
                cfg.RNG_SEED)
            freq_obj_path = cfg.DATA_DIR + '/vg8k/seed{}/train_objects_freqs.json'.format(
                cfg.RNG_SEED)
        else:
            raise NotImplementedError

        self.prd_freq_dict = json.load(open(freq_prd_path))
        self.obj_freq_dict = json.load(open(freq_obj_path))

        no_bg_prd_categories = prd_categories[1:]

        assert len(no_bg_prd_categories) == cfg.MODEL.NUM_PRD_CLASSES

        self.prd_categories = no_bg_prd_categories
        self.obj_categories = obj_categories

        self.freq_prd = get_freq_from_dict(self.prd_freq_dict, self.prd_categories)
        self.freq_obj = get_freq_from_dict(self.obj_freq_dict, self.obj_categories)

        if cfg.MODEL.LOSS == 'weighted_cross_entropy' or cfg.MODEL.LOSS == 'weighted_focal':
            logger.info('loading frequencies')

            freq_prd = self.freq_prd + 1
            freq_obj = self.freq_obj + 1
            prd_weights = np.sum(freq_prd) / freq_prd
            obj_weights = np.sum(freq_obj) / freq_obj

            self.prd_weights = (prd_weights / np.mean(prd_weights)).astype(np.float32)
            self.obj_weights = (obj_weights / np.mean(obj_weights)).astype(np.float32)
            temp = np.zeros(shape=self.prd_weights.shape[0] + 1, dtype=np.float32) 
            temp[1:] = self.prd_weights
            temp[0] = min(self.prd_weights)
            self.prd_weights = temp
        self._init_modules()
    def __init__(self, category_to_id_map, prd_category_to_id_map, args=None):
        super().__init__()

        self.mapping_to_detectron = None
        self.orphans_in_detectron = None
        self.category_to_id_map = category_to_id_map
        self.prd_category_to_id_map = prd_category_to_id_map
        self.args = args

        # -------------------------------------------------------------------------------------------------------------------------------
        # initialize word vectors
        # -------------------------------------------------------------------------------------------------------------------------------
        ds_name = cfg.TRAIN.DATASETS[0] if len(
            cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
        self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(
            ds_name, self.category_to_id_map, self.prd_category_to_id_map)

        # -------------------------------------------------------------------------------------------------------------------------------
        # Backbone for feature extraction
        # -------------------------------------------------------------------------------------------------------------------------------
        self.Conv_Body = get_func(cfg.MODEL.CONV_BODY)()

        # -------------------------------------------------------------------------------------------------------------------------------
        # Region Proposal Network
        # -------------------------------------------------------------------------------------------------------------------------------
        if cfg.RPN.RPN_ON:
            self.RPN = rpn_heads.generic_rpn_outputs(
                self.Conv_Body.dim_out, self.Conv_Body.spatial_scale)

        if cfg.FPN.FPN_ON:
            # Only supports case when RPN and ROI min levels are the same
            assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
            # RPN max level can be >= to ROI max level
            assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
            # FPN RPN max level might be > FPN ROI max level in which case we
            # need to discard some leading conv blobs (blobs are ordered from
            # max/coarsest level to min/finest level)
            self.num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1

            # Retain only the spatial scales that will be used for RoI heads. `Conv_Body.spatial_scale`
            # may include extra scales that are used for RPN proposals, but not for RoI heads.
            self.Conv_Body.spatial_scale = self.Conv_Body.spatial_scale[
                -self.num_roi_levels:]

        # -------------------------------------------------------------------------------------------------------------------------------
        # BBOX Branch
        # -------------------------------------------------------------------------------------------------------------------------------
        self.Box_Head = get_func(cfg.FAST_RCNN.ROI_BOX_HEAD)(
            self.RPN.dim_out, self.roi_feature_transform,
            self.Conv_Body.spatial_scale)

        # -------------------------------------------------------------------------------------------------------------------------------
        # RelPN
        # -------------------------------------------------------------------------------------------------------------------------------
        self.RelPN = relpn_heads.generic_relpn_outputs()

        # -------------------------------------------------------------------------------------------------------------------------------
        # RelDN
        # -------------------------------------------------------------------------------------------------------------------------------
        self.RelDN = reldn_heads.reldn_head(self.Box_Head.dim_out,
                                            self.obj_vecs, self.prd_vecs)
        self.reldn_heads = reldn_heads

        # -------------------------------------------------------------------------------------------------------------------------------
        # triplets
        # -------------------------------------------------------------------------------------------------------------------------------
        if cfg.BINARY_LOSS or cfg.EVAL_MAP:
            if 'vhico' in self.args.dataset:
                if cfg.EVAL_SUBSET == 'test':
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_TEST, 'rb'))
                    # self.video_name_triplet_dict = pickle.load(open(TRIPLET_TRAIN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_TEST))
                elif cfg.EVAL_SUBSET == 'unseen':
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_UNSEEN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_UNSEEN))
                else:
                    self.video_name_triplet_dict = pickle.load(
                        open(TRIPLET_TRAIN, 'rb'))
                    print(
                        'there are %d triplets in %s' %
                        (len(self.video_name_triplet_dict['triplet_id_frame']),
                         TRIPLET_TRAIN))

        # -------------------------------------------------------------------------------------------------------------------------------
        # initialize model
        # -------------------------------------------------------------------------------------------------------------------------------
        self._init_modules()