Esempio n. 1
0
elif conf.model == 'shz_fusion':
    from lib.shz_models.rel_model_fusion import RelModel
elif conf.model == 'shz_fusion_beta':
    from lib.shz_models.rel_model_fusion_beta import RelModel
# --
else:
    raise ValueError()

# -- Create Tensorboard summary writer
writer = SummaryWriter(comment='_run#' + conf.save_dir.split('/')[-1])

# -- Create dataset splits and dataset loader
train, val, _ = VG.splits(
    num_val_im=conf.val_size,
    filter_duplicate_rels=True,
    use_proposals=conf.use_proposals,
    filter_non_overlap=conf.mode == 'sgdet',
    # -- Depth dataset parameters
    use_depth=conf.load_depth,
    three_channels_depth=conf.pretrained_depth)

train_loader, val_loader = VGDataLoader.splits(
    train,
    val,
    mode='rel',
    batch_size=conf.batch_size,
    num_workers=conf.num_workers,
    num_gpus=conf.num_gpus,
    # -- Depth dataset parameters
    use_depth=conf.load_depth)

# -- Create the specified Relation-Detection model
Esempio n. 2
0
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
from tqdm import tqdm
from config import BOX_SCALE, IM_SCALE
import dill as pkl
import os

conf = ModelConfig()
if conf.model == 'motifnet':
    from lib.rel_model import RelModel
elif conf.model == 'stanford':
    from lib.rel_model_stanford import RelModelStanford as RelModel
else:
    raise ValueError()

train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                          use_proposals=conf.use_proposals,
                          filter_non_overlap=conf.mode == 'sgdet')
if conf.test:
    val = test
if conf.train:
    val = train
train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                    num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                    use_resnet=conf.use_resnet, order=conf.order,
                    nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
                    use_proposals=conf.use_proposals,
"""
SCRIPT TO MAKE MEMES. this was from an old version of the code, so it might require some fixes to get working.

"""
from dataloaders.visual_genome import VG
# import matplotlib
# # matplotlib.use('Agg')
from tqdm import tqdm
import seaborn as sns
import numpy as np
from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
from collections import defaultdict
train, val, test = VG.splits(filter_non_overlap=False, num_val_im=2000)

count_threshold = 50
pmi_threshold = 10

o_type = []
f = open("object_types.txt")
for line in f.readlines():
    tabs = line.strip().split("\t")
    t = tabs[1].split("_")[0]
    o_type.append(t)

r_type = []
f = open("relation_types.txt")
for line in f.readlines():
    tabs = line.strip().split("\t")
    t = tabs[1].split("_")[0]
    r_type.append(t)
Esempio n. 4
0
def main():
    args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1'
    sys.argv = args.split(' ')
    conf = ModelConfig()

    if conf.model == 'motifnet':
        from lib.rel_model import RelModel
    elif conf.model == 'stanford':
        from lib.rel_model_stanford import RelModelStanford as RelModel
    else:
        raise ValueError()

    train, val, test = VG.splits(
        num_val_im=conf.val_size, filter_duplicate_rels=True,
        use_proposals=conf.use_proposals,
        filter_non_overlap=conf.mode == 'sgdet',
    )
    if conf.test:
        val = test
    train_loader, val_loader = VGDataLoader.splits(
        train, val, mode='rel', batch_size=conf.batch_size,
        num_workers=conf.num_workers, num_gpus=conf.num_gpus
    )

    detector = RelModel(
        classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
        num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
        use_resnet=conf.use_resnet, order=conf.order,
        nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
        use_proposals=conf.use_proposals,
        pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
        pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
        pooling_dim=conf.pooling_dim,
        rec_dropout=conf.rec_dropout,
        use_bias=conf.use_bias,
        use_tanh=conf.use_tanh,
        limit_vision=conf.limit_vision
    )


    detector.cuda()
    ckpt = torch.load(conf.ckpt)

    optimistic_restore(detector, ckpt['state_dict'])

    evaluator = BasicSceneGraphEvaluator.all_modes(
        multiple_preds=conf.multi_pred)

    mode, N = 'test.multi_pred', 20
    recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb'))

    np.random.seed(0)
    # sorted_idxs = np.argsort(recs)
    selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False)
    sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])]
    print('Sorted idxs: {}'.format(sorted_idxs.tolist()))

    save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi'

    for idx in selected_idxs:
        gt_entry = {
            'gt_classes': val.gt_classes[idx].copy(),
            'gt_relations': val.relationships[idx].copy(),
            'gt_boxes': val.gt_boxes[idx].copy(),
        }

        detector.eval()
        det_res = detector[vg_collate([test[idx]], num_gpus=1)]

        boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res
        pred_entry = {
            'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
            'pred_classes': objs_i,
            'pred_rel_inds': rels_i,
            'obj_scores': obj_scores_i,
            'rel_scores': pred_scores_i,
        }


        unique_cnames = get_unique_cnames(gt_entry, test)
        save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir)
        save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir)
        save_pred_graph(idx, test, pred_entry, det_res,
                        unique_cnames, save_dir,
                        multi_pred=conf.multi_pred, n_pred=20)
Esempio n. 5
0
import pandas as pd
import time
import os
from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, IM_SCALE, BOX_SCALE
from torch.nn import functional as F
from lib.fpn.box_utils import bbox_loss
import torch.backends.cudnn as cudnn
from pycocotools.cocoeval import COCOeval
from lib.pytorch_misc import optimistic_restore, clip_grad_norm
from torch.optim.lr_scheduler import ReduceLROnPlateau

cudnn.benchmark = True
conf = ModelConfig()

train, val, _ = VG.splits(num_val_im=conf.val_size,
                          filter_non_overlap=False,
                          filter_empty_rels=False,
                          use_proposals=conf.use_proposals)
train_loader, val_loader = VGDataLoader.splits(train,
                                               val,
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

detector = ObjectDetector(
    classes=train.ind_to_classes,
    num_gpus=conf.num_gpus,
    mode='rpntrain' if not conf.use_proposals else 'proposals',
    use_resnet=conf.use_resnet)
detector.cuda()

# Note: if you're doing the stanford setup, you'll need to change this to freeze the lower layers
Esempio n. 6
0
                    type=str,
                    default='caches/kern_sgcls.pkl')

args = parser.parse_args()
os.makedirs(args.save_dir, exist_ok=True)
image_dir = os.path.join(args.save_dir, 'images')
graph_dir = os.path.join(args.save_dir, 'graphs')
os.makedirs(image_dir, exist_ok=True)
os.makedirs(graph_dir, exist_ok=True)
mode = 'sgcls'  # this code is only for sgcls task

# train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
#                         use_proposals=conf.use_proposals,
#                         filter_non_overlap=conf.mode == 'sgdet')
train, val, test = VG.splits(num_val_im=5000,
                             filter_duplicate_rels=True,
                             use_proposals=False,
                             filter_non_overlap=False)
val = test
# train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
#                                             batch_size=conf.batch_size,
#                                             num_workers=conf.num_workers,
#                                             num_gpus=conf.num_gpus)
train_loader, val_loader = VGDataLoader.splits(train,
                                               val,
                                               mode='rel',
                                               batch_size=1,
                                               num_workers=1,
                                               num_gpus=1)
ind_to_predicates = train.ind_to_predicates
ind_to_classes = train.ind_to_classes
Esempio n. 7
0
import pandas as pd
import time
from tqdm import tqdm
from torch.nn.functional import cross_entropy as CE
from lib.pytorch_misc import *
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry
import pickle
from lib.rel_model_stanford import RelModelStanford

EVAL_MODES = ['sgdet'] if conf.mode == 'sgdet' else ['predcls', 'sgcls']
assert conf.mode in EVAL_MODES, (conf.mode, 'other modes not supported')

train, val_splits = VG.splits(data_dir=conf.data,
                              num_val_im=conf.val_size,
                              min_graph_size=conf.min_graph_size,
                              max_graph_size=conf.max_graph_size,
                              mrcnn=conf.detector == 'mrcnn',
                              filter_non_overlap=conf.mode == 'sgdet',
                              exclude_left_right=conf.exclude_left_right)

train_loader, val_loaders = VGDataLoader.splits(train,
                                                val_splits,
                                                mode='rel',
                                                batch_size=conf.batch_size,
                                                num_workers=conf.num_workers,
                                                num_gpus=conf.num_gpus)
val_loader, val_loader_zs, test_loader, test_loader_zs = val_loaders

detector = RelModelStanford(train_data=train,
                            num_gpus=conf.num_gpus,
                            mode=conf.mode,
Esempio n. 8
0
import numpy as np
import torch

from config import ModelConfig
from lib.pytorch_misc import optimistic_restore
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
from tqdm import tqdm
from config import BOX_SCALE, IM_SCALE
from lib.fpn.box_utils import bbox_overlaps
from collections import defaultdict
from PIL import Image, ImageDraw, ImageFont
import os
from functools import reduce

conf = ModelConfig()
train, val, test = VG.splits(num_val_im=conf.val_size)
if conf.test:
    val = test

train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                    num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                    use_resnet=conf.use_resnet, order=conf.order,
                    nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
                    use_proposals=conf.use_proposals,
                    pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
                    pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
Esempio n. 9
0
    def __init__(self,
                 classes,
                 rel_classes,
                 embed_dim,
                 obj_dim,
                 inputs_dim,
                 hidden_dim,
                 pooling_dim,
                 recurrent_dropout_probability=0.2,
                 use_highway=True,
                 use_input_projection_bias=True,
                 use_vision=True,
                 use_bias=True,
                 use_tanh=True,
                 limit_vision=True,
                 sl_pretrain=False,
                 num_iter=-1):
        """
        Initializes the RNN
        :param embed_dim: Dimension of the embeddings
        :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes
        :param hidden_dim: Hidden dim of the decoder
        :param vocab_size: Number of words in the vocab
        :param bos_token: To use during decoding (non teacher forcing mode))
        :param bos: beginning of sentence token
        :param unk: unknown token (not used)
        """
        super(DecoderRNN, self).__init__()

        self.rel_embedding_dim = 100
        self.classes = classes
        self.rel_classes = rel_classes
        embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=100)
        self.obj_embed = nn.Embedding(len(self.classes), embed_dim)
        self.obj_embed.weight.data = embed_vecs

        embed_rels = obj_edge_vectors(self.rel_classes,
                                      wv_dim=self.rel_embedding_dim)
        self.rel_embed = nn.Embedding(len(self.rel_classes),
                                      self.rel_embedding_dim)
        self.rel_embed.weight.data = embed_rels

        self.embed_dim = embed_dim
        self.obj_dim = obj_dim
        self.hidden_size = hidden_dim
        self.inputs_dim = inputs_dim
        self.pooling_dim = pooling_dim
        self.nms_thresh = 0.3

        self.use_vision = use_vision
        self.use_bias = use_bias
        self.use_tanh = use_tanh
        self.limit_vision = limit_vision
        self.sl_pretrain = sl_pretrain
        self.num_iter = num_iter

        self.recurrent_dropout_probability = recurrent_dropout_probability
        self.use_highway = use_highway
        # We do the projections for all the gates all at once, so if we are
        # using highway layers, we need some extra projections, which is
        # why the sizes of the Linear layers change here depending on this flag.
        if use_highway:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                6 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   5 * self.hidden_size,
                                                   bias=True)
        else:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                4 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   4 * self.hidden_size,
                                                   bias=True)

        # self.obj_in_lin = torch.nn.Linear(self.rel_embedding_dim, self.rel_embedding_dim, bias=True)

        self.out = nn.Linear(self.hidden_size, len(self.classes))
        self.reset_parameters()

        # For relation predication
        embed_vecs2 = obj_edge_vectors(self.classes, wv_dim=embed_dim)
        self.obj_embed2 = nn.Embedding(self.num_classes, embed_dim)
        self.obj_embed2.weight.data = embed_vecs2.clone()

        # self.post_lstm = nn.Linear(self.hidden_dim, self.pooling_dim * 2)
        self.post_lstm = nn.Linear(self.obj_dim + 2 * self.embed_dim + 128,
                                   self.pooling_dim * 2)
        # Initialize to sqrt(1/2n) so that the outputs all have mean 0 and variance 1.
        # (Half contribution comes from LSTM, half from embedding.
        # In practice the pre-lstm stuff tends to have stdev 0.1 so I multiplied this by 10.
        self.post_lstm.weight.data.normal_(
            0, 10.0 * math.sqrt(1.0 / self.hidden_size)
        )  ######## there may need more consideration
        self.post_lstm.bias.data.zero_()

        self.rel_compress = nn.Linear(self.pooling_dim,
                                      self.num_rels,
                                      bias=True)
        self.rel_compress.weight = torch.nn.init.xavier_normal(
            self.rel_compress.weight, gain=1.0)
        if self.use_bias:
            self.freq_bias = FrequencyBias()

            # simple relation model
            from dataloaders.visual_genome import VG
            from lib.get_dataset_counts import get_counts, box_filter
            fg_matrix, bg_matrix = get_counts(train_data=VG.splits(
                num_val_im=5000,
                filter_non_overlap=True,
                filter_duplicate_rels=True,
                use_proposals=False)[0],
                                              must_overlap=True)
            prob_matrix = fg_matrix.astype(np.float32)
            prob_matrix[:, :, 0] = bg_matrix

            # TRYING SOMETHING NEW.
            prob_matrix[:, :, 0] += 1
            prob_matrix /= np.sum(prob_matrix, 2)[:, :, None]
            # prob_matrix /= float(fg_matrix.max())

            prob_matrix[:, :, 0] = 0  # Zero out BG
            self.prob_matrix = prob_matrix