コード例 #1
0
ファイル: sparse_targets.py プロジェクト: zyong812/het-eccv20
    def __init__(self, dbname='vg', eps=1e-3):
        super(FrequencyBias, self).__init__()
        if dbname == 'vg':
            db = VG
        elif dbname == 'vg200':
            db = VG200
        elif dbname == 'vg200_kr':
            db = VG200_Keyrel
        elif dbname == 'vg200_kr_cap':
            db = VG200_Keyrel_captions
        elif dbname == 'vrd':
            db = VRD

        fg_matrix, bg_matrix = get_counts(train_data=db(
            mode='train', filter_duplicate_rels=False),
                                          must_overlap=True)
        bg_matrix += 1
        fg_matrix[:, :, 0] = bg_matrix

        pred_dist = np.log(fg_matrix / fg_matrix.sum(2)[:, :, None] + eps)

        self.num_objs = pred_dist.shape[0]
        pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])

        self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
        self.obj_baseline.weight.data = pred_dist
コード例 #2
0
    def __init__(self, train_data, eps=1e-3):
        super(FrequencyBias, self).__init__()

        fg_matrix, bg_matrix = get_counts(train_data, must_overlap=True)
        bg_matrix += 1
        fg_matrix[:, :, 0] = bg_matrix

        pred_dist = np.log(fg_matrix / fg_matrix.sum(2)[:, :, None] + eps)

        self.num_objs = pred_dist.shape[0]
        pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])

        self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
        self.obj_baseline.weight.data = pred_dist
コード例 #3
0
ファイル: sparse_targets.py プロジェクト: taksau/sglabv1
    def __init__(self, eps=1e-3):
        super(FrequencyBias, self).__init__()

        fg_matrix, bg_matrix = get_counts(must_overlap=True)
        # bg_matrix += 1
        fg_matrix[:, :, 0] = bg_matrix

        pred_dist = fg_matrix / (fg_matrix.sum(2)[:, :, None] + eps)

        self.num_objs = pred_dist.shape[0]
        pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])
        pred_dist_log = torch.nn.functional.log_softmax(Variable(pred_dist),
                                                        dim=-1).data

        self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
        self.obj_baseline.weight.data = pred_dist_log
        self.obj_baseline_state = torch.nn.functional.softmax(Variable(
            pred_dist.clone()).cuda(),
                                                              dim=-1)
コード例 #4
0
MUST_OVERLAP = False
train, val, test = VG.splits(num_val_im=conf.val_size,
                             filter_non_overlap=MUST_OVERLAP,
                             filter_duplicate_rels=True,
                             use_proposals=conf.use_proposals)
if conf.test:
    print("test data!")
    val = test
train_loader, val_loader = VGDataLoader.splits(train,
                                               val,
                                               mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

fg_matrix, bg_matrix = get_counts(train_data=train, must_overlap=MUST_OVERLAP)

detector = ObjectDetector(
    classes=train.ind_to_classes,
    num_gpus=conf.num_gpus,
    mode='rpntrain' if not conf.use_proposals else 'proposals',
    use_resnet=conf.use_resnet,
    nms_filter_duplicates=True,
    thresh=0.01)
detector.eval()
detector.cuda()

classifier = ObjectDetector(classes=train.ind_to_classes,
                            num_gpus=conf.num_gpus,
                            mode='gtbox',
                            use_resnet=conf.use_resnet,
コード例 #5
0
ファイル: eval.py プロジェクト: bknyaz/sgg
def val_epoch(mode,
              sgg_model,
              loader,
              name,
              triplet_counts,
              triplet2str,
              n_batches=-1,
              is_test=False,
              save_scores=False,
              predicate_weight=0,
              train=None,
              wandb_log=None,
              **kwargs):
    print('\nEvaluate %s %s triplets' %
          (name.upper(), 'test' if is_test else 'val'))
    sgg_model.eval()
    evaluator, all_pred_entries, all_metrics = {}, {}, []

    EVAL_MODES = ['sgdet'] if mode == 'sgdet' else ['predcls', 'sgcls']
    assert mode in EVAL_MODES, (mode, 'other modes not supported')

    predicate_weights = None
    if predicate_weight != 0:
        fg_matrix, bg_matrix = get_counts(train, must_overlap=True)
        fg_matrix[:, :, 0] = bg_matrix + 1
        fg_matrix = fg_matrix + 1
        predicate_weights = fg_matrix.mean(axis=(0, 1))**predicate_weight

    with NO_GRAD():
        for eval_m in EVAL_MODES:
            if eval_m == 'sgdet' and name.find('val_') >= 0:
                continue  # skip for validation, because it takes a lot of time

            print('\nEvaluating %s...' % eval_m.upper())

            evaluator[eval_m] = BasicSceneGraphEvaluator(
                eval_m)  # graph constrained evaluator
            evaluator[eval_m + '_nogc'] = BasicSceneGraphEvaluator(
                eval_m,
                multiple_preds=True,  # graph unconstrained evaluator
                per_triplet=name in all_shot_splits,
                triplet_counts=triplet_counts,
                triplet2str=triplet2str)

            # for calculating recall of each relationship except no relationship
            evaluator_list, evaluator_multiple_preds_list = [], []
            if name not in ['val_zs', 'test_zs'] and name.find('val_') < 0:
                for index, name_s in enumerate(
                        loader.dataset.ind_to_predicates):
                    if index == 0:
                        continue
                    evaluator_list.append(
                        (index, name_s, BasicSceneGraphEvaluator.all_modes()))
                    evaluator_multiple_preds_list.append(
                        (index, name_s,
                         BasicSceneGraphEvaluator.all_modes(
                             multiple_preds=True)))

            set_mode(sgg_model, mode=eval_m, is_train=False, verbose=True)

            # For all val/test batches
            all_pred_entries[eval_m] = []
            for val_b, batch in enumerate(tqdm(loader)):
                pred_entry = val_batch(sgg_model,
                                       val_b,
                                       batch,
                                       evaluator,
                                       eval_m,
                                       loader.dataset,
                                       evaluator_list,
                                       evaluator_multiple_preds_list,
                                       train=train,
                                       predicate_weights=predicate_weights,
                                       **kwargs)
                if save_scores:
                    all_pred_entries[eval_m].extend(pred_entry)

                if n_batches > -1 and val_b + 1 >= n_batches:
                    break

            evaluator[eval_m].print_stats()
            evaluator[eval_m + '_nogc'].print_stats()

            mean_recall = mean_recall_mp = None
            if len(evaluator_list) > 0:
                # Compute Mean Recall Results
                mean_recall = calculate_mR_from_evaluator_list(evaluator_list,
                                                               eval_m,
                                                               save_file=None)
                mean_recall_mp = calculate_mR_from_evaluator_list(
                    evaluator_multiple_preds_list,
                    eval_m,
                    multiple_preds=True,
                    save_file=None)

            if not wandb_log:
                continue

            # Log using WANDB
            eval_gc = evaluator[eval_m].result_dict
            eval_no_gc = evaluator[eval_m + '_nogc'].result_dict
            results_dict = {}
            for eval_, mean_eval, sfx in zip([eval_gc, eval_no_gc],
                                             [mean_recall, mean_recall_mp],
                                             ['GC', 'NOGC']):
                for k, v in eval_[eval_m + '_recall'].items():
                    all_metrics.append(np.mean(v))
                    results_dict['%s/%s_R@%i_%s' %
                                 (eval_m, name, k, sfx)] = np.mean(v)
                if mean_eval:
                    for k, v in mean_eval.items():
                        results_dict['%s/%s_m%s_%s' %
                                     (eval_m, name, k, sfx)] = np.mean(v)

            # Per triplet metrics
            try:
                if name in all_shot_splits:
                    for case in ['', '_norm']:
                        for k, v in eval_no_gc[eval_m + '_recall_triplet' +
                                               case].items():
                            results_dict['%s/%s_R@%i_triplet%s' %
                                         (eval_m, name, k, case)] = v
                        for metric in ['meanrank', 'medianrank'] + (
                            ['medianrankclass'] if case == '' else []):
                            results_dict['%s/%s_%s_triplet%s' % (eval_m, name, metric, case)] = \
                                eval_no_gc[eval_m + ('_%s_triplet' % metric) + case]
            except Exception as e:
                print('error in per triplet eval', e)
コード例 #6
0
    def __init__(self,
                 vocabs,
                 vocab_size,
                 input_encoding_size,
                 rnn_type='lstm',
                 rnn_size=512,
                 num_layers=1,
                 drop_prob_lm=0.5,
                 seq_length=16,
                 seq_per_img=5,
                 fc_feat_size=4096,
                 att_feat_size=512,
                 num_relation=20,
                 object_classes=None,
                 predicate_classes=None,
                 triplet_embed_dim=-1,
                 embed_triplet=True,
                 freq_bl=False):
        super(RelCaptionModel, self).__init__()
        self.vocabs = vocabs
        self.vocabs['0'] = '__SENTSIGN__'  ## ix
        self.vocabs = {i: self.vocabs[str(i)] for i in range(len(self.vocabs))}
        vocab_list = [self.vocabs[i] for i in range(len(self.vocabs))]
        self.vocab_size = vocab_size + 1  # including all the words and <UNK>, and 0 for <start>/<end>

        self.input_encoding_size = input_encoding_size
        self.rnn_type = rnn_type
        self.rnn_size = rnn_size
        self.num_layers = num_layers
        self.drop_prob_lm = drop_prob_lm
        self.seq_length = seq_length
        self.fc_feat_size = fc_feat_size
        self.ss_prob = 0.0  # Schedule sampling probability
        self.num_relation_per_img = num_relation
        self.seq_per_img = seq_per_img
        self.embed_triplet = embed_triplet
        self.triplet_embed_dim = triplet_embed_dim

        self.freq_bl = freq_bl

        self.linear = nn.Linear(self.fc_feat_size, self.num_layers *
                                self.rnn_size)  # feature to rnn_size
        embed_vec = obj_edge_vectors(vocab_list,
                                     wv_dim=self.input_encoding_size)
        self.embed = nn.Embedding(self.vocab_size, self.input_encoding_size)
        self.embed.weight.data = embed_vec.clone()

        if self.embed_triplet:
            assert object_classes is not None and predicate_classes is not None
            object_embed_vec = obj_edge_vectors(object_classes,
                                                wv_dim=self.triplet_embed_dim)
            predicate_embed_vec = obj_edge_vectors(
                predicate_classes, wv_dim=self.triplet_embed_dim)
            self.object_embed = nn.Embedding(len(object_classes),
                                             self.triplet_embed_dim)
            self.object_embed.weight.data = object_embed_vec.clone()
            self.predicate_embed = nn.Embedding(len(predicate_classes),
                                                self.triplet_embed_dim)
            self.predicate_embed.weight.data = predicate_embed_vec.clone()

        self.logit = nn.Linear(self.rnn_size, self.vocab_size)
        self.dropout = nn.Dropout(self.drop_prob_lm)

        self.core = RelCaptionCore(input_encoding_size, rnn_type, rnn_size,
                                   num_layers, drop_prob_lm, fc_feat_size,
                                   att_feat_size, triplet_embed_dim,
                                   embed_triplet)

        if self.freq_bl:
            self.freq_matrix, _ = get_counts(train_data=VG200(
                mode='train', filter_duplicate_rels=False, num_val_im=1000),
                                             must_overlap=True)
        else:
            self.freq_matrix = None

        self.init_weights()
コード例 #7
0
    def __init__(self,
                 classes,
                 rel_classes,
                 inputs_dim,
                 hidden_dim,
                 recurrent_dropout_probability=0.2,
                 use_highway=True,
                 use_input_projection_bias=True):
        """Initializes the RNN
        Args:
            classes:
            rel_classes:
            inputs_dim:
            hidden_dim: Hidden dim of the decoder
            recurrent_dropout_probability:
            use_highway:
            use_input_projection_bias:
        """
        # TODO add database bias in this module
        super(MemoryRNN, self).__init__()

        self.classes = classes
        self.rel_classes = rel_classes
        self.hidden_size = hidden_dim
        self.inputs_dim = inputs_dim
        self.nms_thresh = 0.3

        self.rel_mem_h = nn.Embedding(self.num_rels, hidden_dim)
        self.rel_mem_h.weight.data.fill_(0)
        self.rel_mem_c = nn.Embedding(self.num_rels, hidden_dim)
        self.rel_mem_c.weight.data.fill_(0)

        self.recurrent_dropout_probability = recurrent_dropout_probability
        self.use_highway = use_highway

        # We do the projections for all the gates all at once, so if we are
        # using highway layers, we need some extra projections, which is
        # why the sizes of the Linear layers change here depending on this flag.
        if use_highway:
            self.input_linearity = torch.nn.Linear(
                self.inputs_dim,
                6 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   5 * self.hidden_size,
                                                   bias=True)
        else:
            self.input_linearity = torch.nn.Linear(
                self.inputs_dim,
                4 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   4 * self.hidden_size,
                                                   bias=True)

        self.out = nn.Linear(self.hidden_size, len(self.rel_classes))
        self.reset_parameters()

        fg_matrix, bg_matrix = get_counts()
        rel_obj_distribution = fg_matrix / (fg_matrix.sum(2)[:, :, None] +
                                            1e-5)
        rel_obj_distribution = torch.FloatTensor(rel_obj_distribution)
        rel_obj_distribution = rel_obj_distribution.view(-1, self.num_rels)

        self.rel_obj_distribution = nn.Embedding(rel_obj_distribution.size(0),
                                                 self.num_rels)
        # (#obj_class * #obj_class, #rel_class)
        self.rel_obj_distribution.weight.data = rel_obj_distribution
コード例 #8
0
    def __init__(self,
                 classes,
                 rel_classes,
                 embed_dim,
                 obj_dim,
                 inputs_dim,
                 hidden_dim,
                 pooling_dim,
                 recurrent_dropout_probability=0.2,
                 use_highway=True,
                 use_input_projection_bias=True,
                 use_vision=True,
                 use_bias=True,
                 use_tanh=True,
                 limit_vision=True,
                 sl_pretrain=False,
                 num_iter=-1):
        """
        Initializes the RNN
        :param embed_dim: Dimension of the embeddings
        :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes
        :param hidden_dim: Hidden dim of the decoder
        :param vocab_size: Number of words in the vocab
        :param bos_token: To use during decoding (non teacher forcing mode))
        :param bos: beginning of sentence token
        :param unk: unknown token (not used)
        """
        super(DecoderRNN, self).__init__()

        self.rel_embedding_dim = 100
        self.classes = classes
        self.rel_classes = rel_classes
        embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=100)
        self.obj_embed = nn.Embedding(len(self.classes), embed_dim)
        self.obj_embed.weight.data = embed_vecs

        embed_rels = obj_edge_vectors(self.rel_classes,
                                      wv_dim=self.rel_embedding_dim)
        self.rel_embed = nn.Embedding(len(self.rel_classes),
                                      self.rel_embedding_dim)
        self.rel_embed.weight.data = embed_rels

        self.embed_dim = embed_dim
        self.obj_dim = obj_dim
        self.hidden_size = hidden_dim
        self.inputs_dim = inputs_dim
        self.pooling_dim = pooling_dim
        self.nms_thresh = 0.3

        self.use_vision = use_vision
        self.use_bias = use_bias
        self.use_tanh = use_tanh
        self.limit_vision = limit_vision
        self.sl_pretrain = sl_pretrain
        self.num_iter = num_iter

        self.recurrent_dropout_probability = recurrent_dropout_probability
        self.use_highway = use_highway
        # We do the projections for all the gates all at once, so if we are
        # using highway layers, we need some extra projections, which is
        # why the sizes of the Linear layers change here depending on this flag.
        if use_highway:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                6 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   5 * self.hidden_size,
                                                   bias=True)
        else:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                4 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   4 * self.hidden_size,
                                                   bias=True)

        # self.obj_in_lin = torch.nn.Linear(self.rel_embedding_dim, self.rel_embedding_dim, bias=True)

        self.out = nn.Linear(self.hidden_size, len(self.classes))
        self.reset_parameters()

        # For relation predication
        embed_vecs2 = obj_edge_vectors(self.classes, wv_dim=embed_dim)
        self.obj_embed2 = nn.Embedding(self.num_classes, embed_dim)
        self.obj_embed2.weight.data = embed_vecs2.clone()

        # self.post_lstm = nn.Linear(self.hidden_dim, self.pooling_dim * 2)
        self.post_lstm = nn.Linear(self.obj_dim + 2 * self.embed_dim + 128,
                                   self.pooling_dim * 2)
        # Initialize to sqrt(1/2n) so that the outputs all have mean 0 and variance 1.
        # (Half contribution comes from LSTM, half from embedding.
        # In practice the pre-lstm stuff tends to have stdev 0.1 so I multiplied this by 10.
        self.post_lstm.weight.data.normal_(
            0, 10.0 * math.sqrt(1.0 / self.hidden_size)
        )  ######## there may need more consideration
        self.post_lstm.bias.data.zero_()

        self.rel_compress = nn.Linear(self.pooling_dim,
                                      self.num_rels,
                                      bias=True)
        self.rel_compress.weight = torch.nn.init.xavier_normal(
            self.rel_compress.weight, gain=1.0)
        if self.use_bias:
            self.freq_bias = FrequencyBias()

            # simple relation model
            from dataloaders.visual_genome import VG
            from lib.get_dataset_counts import get_counts, box_filter
            fg_matrix, bg_matrix = get_counts(train_data=VG.splits(
                num_val_im=5000,
                filter_non_overlap=True,
                filter_duplicate_rels=True,
                use_proposals=False)[0],
                                              must_overlap=True)
            prob_matrix = fg_matrix.astype(np.float32)
            prob_matrix[:, :, 0] = bg_matrix

            # TRYING SOMETHING NEW.
            prob_matrix[:, :, 0] += 1
            prob_matrix /= np.sum(prob_matrix, 2)[:, :, None]
            # prob_matrix /= float(fg_matrix.max())

            prob_matrix[:, :, 0] = 0  # Zero out BG
            self.prob_matrix = prob_matrix