Example #1
0
def main():
    fname = os.path.join(conf.save_dir, 'train_losses.csv')
    train_f = open(fname, 'w')
    train_f.write(
        'iter,class_loss,rel_loss,total,recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n'
    )
    train_f.flush()

    fname = os.path.join(conf.save_dir, 'val_losses.csv')
    val_f = open(fname, 'w')
    val_f.write(
        'recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n'
    )
    val_f.flush()

    train, val, _ = VG.splits(num_val_im=conf.val_size,
                              filter_duplicate_rels=True,
                              use_proposals=conf.use_proposals,
                              filter_non_overlap=conf.mode == 'sgdet')
    train_loader, val_loader = VGDataLoader.splits(
        train,
        val,
        mode='rel',
        batch_size=conf.batch_size,
        num_workers=conf.num_workers,
        num_gpus=conf.num_gpus)

    detector = RelModel(
        classes=train.ind_to_classes,
        rel_classes=train.ind_to_predicates,
        num_gpus=conf.num_gpus,
        mode=conf.mode,
        require_overlap_det=True,
        use_resnet=conf.use_resnet,
        order=conf.order,
        nl_edge=conf.nl_edge,
        nl_obj=conf.nl_obj,
        hidden_dim=conf.hidden_dim,
        use_proposals=conf.use_proposals,
        pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
        pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
        pooling_dim=conf.pooling_dim,
        rec_dropout=conf.rec_dropout,
        use_bias=conf.use_bias,
        use_tanh=conf.use_tanh,
        limit_vision=conf.limit_vision,
        lml_topk=conf.lml_topk,
        lml_softmax=conf.lml_softmax,
        entr_topk=conf.entr_topk,
        ml_loss=conf.ml_loss)

    # Freeze the detector
    for n, param in detector.detector.named_parameters():
        param.requires_grad = False

    print(print_para(detector), flush=True)

    ckpt = torch.load(conf.ckpt)
    if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel':
        print("Loading EVERYTHING")
        start_epoch = ckpt['epoch']

        if not optimistic_restore(detector, ckpt['state_dict']):
            start_epoch = -1
            # optimistic_restore(
            #     detector.detector,
            #     torch.load('checkpoints/vgdet/vg-28.tar')['state_dict']
            # )
    else:
        start_epoch = -1
        optimistic_restore(detector.detector, ckpt['state_dict'])

        detector.roi_fmap[1][0].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.0.weight'])
        detector.roi_fmap[1][3].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.3.weight'])
        detector.roi_fmap[1][0].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.0.bias'])
        detector.roi_fmap[1][3].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.3.bias'])

        detector.roi_fmap_obj[0].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.0.weight'])
        detector.roi_fmap_obj[3].weight.data.copy_(
            ckpt['state_dict']['roi_fmap.3.weight'])
        detector.roi_fmap_obj[0].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.0.bias'])
        detector.roi_fmap_obj[3].bias.data.copy_(
            ckpt['state_dict']['roi_fmap.3.bias'])

    detector.cuda()

    print("Training starts now!")
    optimizer, scheduler = get_optim(detector,
                                     conf.lr * conf.num_gpus * conf.batch_size)
    best_eval = None
    for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs):
        rez = train_epoch(epoch, detector, train, train_loader, optimizer,
                          conf, train_f)
        print("overall{:2d}: ({:.3f})\n{}".format(epoch,
                                                  rez.mean(1)['total'],
                                                  rez.mean(1)),
              flush=True)

        mAp = val_epoch(detector, val, val_loader, val_f)
        scheduler.step(mAp)

        if conf.save_dir is not None:
            if best_eval is None or mAp > best_eval:
                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict': detector.state_dict(),
                        # 'optimizer': optimizer.state_dict(),
                    },
                    os.path.join(conf.save_dir, 'best-val.tar'))
                best_eval = mAp
Example #2
0
import torch

from config import ModelConfig
from lib.pytorch_misc import optimistic_restore
from tqdm import tqdm
from config import BOX_SCALE, IM_SCALE
from lib.fpn.box_utils import bbox_overlaps
from collections import defaultdict
from PIL import Image, ImageDraw, ImageFont
import os
from functools import reduce
import json
from collections import OrderedDict

conf = ModelConfig()
train, val, test = VG.splits(num_val_im=conf.val_size, filter_non_overlap=False) 

set_name = ''
if conf.test:
	val = test
	set_name = 'test'
elif conf.val:
	val = val
	set_name = 'val'
else:
	val = train
	set_name = 'train'

train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
											   batch_size=conf.batch_size,
											   num_workers=conf.num_workers,
    ixmax = np.minimum(gt_box[2], pred_boxes[:, 2])
    iymax = np.minimum(gt_box[3], pred_boxes[:, 3])
    iw = np.maximum(ixmax - ixmin + 1., 0.)
    ih = np.maximum(iymax - iymin + 1., 0.)
    inters = iw * ih

    # union
    uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) +
           (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) *
           (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters)

    overlaps = inters / uni
    return overlaps


train, val, test = VG.splits()

result_dict_mine = {'sgdet_recall': {20: [], 50: [], 100: []}}
result_dict_theirs = {'sgdet_recall': {20: [], 50: [], 100: []}}

for img_i in trange(len(val)):
    gt_entry = {
        'gt_classes': val.gt_classes[img_i].copy(),
        'gt_relations': val.relationships[img_i].copy(),
        'gt_boxes': val.gt_boxes[img_i].copy(),
    }

    # Use shuffled GT boxes
    gt_indices = np.arange(
        gt_entry['gt_boxes'].shape[0]
    )  #np.random.choice(gt_entry['gt_boxes'].shape[0], 20)
Example #4
0
from config import ModelConfig
from lib.pytorch_misc import optimistic_restore
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry
from tqdm import tqdm
from config import BOX_SCALE, IM_SCALE
import dill as pkl
import os
from lib.kern_model import KERN


conf = ModelConfig()


train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                          use_proposals=conf.use_proposals,
                          filter_non_overlap=conf.mode == 'sgdet')
ind_to_predicates = train.ind_to_predicates # ind_to_predicates[0] means no relationship
if conf.test:
    val = test
train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

detector = KERN(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                use_resnet=conf.use_resnet, use_proposals=conf.use_proposals,
                use_ggnn_obj=conf.use_ggnn_obj, ggnn_obj_time_step_num=conf.ggnn_obj_time_step_num,
                ggnn_obj_hidden_dim=conf.ggnn_obj_hidden_dim, ggnn_obj_output_dim=conf.ggnn_obj_output_dim,
                use_obj_knowledge=conf.use_obj_knowledge, obj_knowledge=conf.obj_knowledge,
Example #5
0
def main(args):
    print(args)
    check_args(args)
    if not exists(args.output_dir):
        os.makedirs(args.output_dir)
    summary_writer = SummaryWriter(args.output_dir)

    if args.coco:
        train, val = CocoDetection.splits()
        val.ids = val.ids[:args.val_size]
        train.ids = train.ids
        train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=args.batch_size,
                                                         num_workers=args.num_workers,
                                                         num_gpus=args.num_gpus)
    else:
        train, val, _ = VG.splits(num_val_im=args.val_size, filter_non_overlap=False,
                                  filter_empty_rels=False, use_proposals=args.use_proposals)
        train_loader, val_loader = VGDataLoader.splits(train, val, batch_size=args.batch_size,
                                                       num_workers=args.num_workers,
                                                       num_gpus=args.num_gpus)
    print(train.ind_to_classes)
    os._exit(0)

    all_in_one_model = neural_motifs_sg2im_model(args, train.ind_to_classes)
    # Freeze the detector
    for n, param in all_in_one_model.detector.named_parameters():
        param.requires_grad = False
    all_in_one_model.cuda()
    gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type)

    t, epoch, checkpoint = all_in_one_model.t, all_in_one_model.epoch, all_in_one_model.checkpoint
    while True:
        if t >= args.num_iterations:
            break
        epoch += 1
        print('Starting epoch %d' % epoch)

        for step, batch in enumerate(tqdm(train_loader, desc='Training Epoch %d' % epoch, total=len(train_loader))):
            if t == args.eval_mode_after:
                print('switching to eval mode')
                all_in_one_model.model.eval()
                all_in_one_model.optimizer = optim.Adam(all_in_one_model.parameters(), lr=args.learning_rate)
            t += 1

            with timeit('forward', args.timing):
                result = all_in_one_model[batch]
                imgs, imgs_pred, objs, g_scores_fake_crop, g_obj_scores_fake_crop, g_scores_fake_img, \
                d_scores_fake_crop, d_obj_scores_fake_crop, d_scores_real_crop, d_obj_scores_real_crop, \
                d_scores_fake_img, d_scores_real_img = result.imgs, result.imgs_pred, result.objs, \
                result.g_scores_fake_crop, result.g_obj_scores_fake_crop, result.g_scores_fake_img, \
                result.d_scores_fake_crop, result.d_obj_scores_fake_crop, result.d_scores_real_crop, \
                result.d_obj_scores_real_crop, result.d_scores_fake_img, result.d_scores_real_img

            with timeit('loss', args.timing):
                total_loss, losses = calculate_model_losses(
                    args, imgs, imgs_pred)

                if all_in_one_model.obj_discriminator is not None:
                    total_loss = add_loss(total_loss, F.cross_entropy(g_obj_scores_fake_crop, objs), losses, 'ac_loss',
                                          args.ac_loss_weight)
                    weight = args.discriminator_loss_weight * args.d_obj_weight
                    total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_crop), losses,
                                          'g_gan_obj_loss', weight)

                if all_in_one_model.img_discriminator is not None:
                    weight = args.discriminator_loss_weight * args.d_img_weight
                    total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_img), losses,
                                          'g_gan_img_loss', weight)

            losses['total_loss'] = total_loss.item()
            if not math.isfinite(losses['total_loss']):
                print('WARNING: Got loss = NaN, not backpropping')
                continue

            with timeit('backward', args.timing):
                all_in_one_model.optimizer.zero_grad()
                total_loss.backward()
                all_in_one_model.optimizer.step()


            if all_in_one_model.obj_discriminator is not None:
                with timeit('d_obj loss', args.timing):
                    d_obj_losses = LossManager()
                    d_obj_gan_loss = gan_d_loss(d_scores_real_crop, d_scores_fake_crop)
                    d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss')
                    d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_real_crop, objs), 'd_ac_loss_real')
                    d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_fake_crop, objs), 'd_ac_loss_fake')

                with timeit('d_obj backward', args.timing):
                    all_in_one_model.optimizer_d_obj.zero_grad()
                    d_obj_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_obj.step()

            if all_in_one_model.img_discriminator is not None:
                with timeit('d_img loss', args.timing):
                    d_img_losses = LossManager()
                    d_img_gan_loss = gan_d_loss(d_scores_real_img, d_scores_fake_img)
                    d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss')

                with timeit('d_img backward', args.timing):
                    all_in_one_model.optimizer_d_img.zero_grad()
                    d_img_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_img.step()

            if t % args.print_every == 0:
                print('t = %d / %d' % (t, args.num_iterations))
                G_loss_list = []
                for name, val in losses.items():
                    G_loss_list.append('[%s]: %.4f' % (name, val))
                    checkpoint['losses'][name].append(val)
                    summary_writer.add_scalar("G_%s" % name, val, t)
                print("G: %s" % ", ".join(G_loss_list))
                checkpoint['losses_ts'].append(t)

                if all_in_one_model.obj_discriminator is not None:
                    D_obj_loss_list = []
                    for name, val in d_obj_losses.items():
                        D_obj_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_obj_%s" % name, val, t)
                    print("D_obj: %s" % ", ".join(D_obj_loss_list))

                if all_in_one_model.img_discriminator is not None:
                    D_img_loss_list = []
                    for name, val in d_img_losses.items():
                        D_img_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_img_%s" % name, val, t)
                    print("D_img: %s" % ", ".join(D_img_loss_list))

            if t % args.checkpoint_every == 0:
                print('checking on train')
                train_results = check_model(args, train_loader, all_in_one_model)
                t_losses, t_samples = train_results

                checkpoint['train_samples'].append(t_samples)
                checkpoint['checkpoint_ts'].append(t)
                for name, images in t_samples.items():
                    summary_writer.add_image("train_%s" % name, images, t)

                print('checking on val')
                val_results = check_model(args, val_loader, all_in_one_model)
                val_losses, val_samples = val_results
                checkpoint['val_samples'].append(val_samples)
                for name, images in val_samples.items():
                    summary_writer.add_image("val_%s" % name, images, t)

                for k, v in val_losses.items():
                    checkpoint['val_losses'][k].append(v)
                    summary_writer.add_scalar("val_%s" % k, v, t)
                checkpoint['model_state'] = all_in_one_model.model.state_dict()

                if all_in_one_model.obj_discriminator is not None:
                    checkpoint['d_obj_state'] = all_in_one_model.obj_discriminator.state_dict()
                    checkpoint['d_obj_optim_state'] = all_in_one_model.optimizer_d_obj.state_dict()

                if all_in_one_model.img_discriminator is not None:
                    checkpoint['d_img_state'] = all_in_one_model.img_discriminator.state_dict()
                    checkpoint['d_img_optim_state'] = all_in_one_model.optimizer_d_img.state_dict()

                checkpoint['optim_state'] = all_in_one_model.optimizer.state_dict()
                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint_path = os.path.join(args.output_dir,
                                               '%s_with_model.pt' % args.checkpoint_name)
                print('Saving checkpoint to ', checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

                # Save another checkpoint without any model or optim state
                checkpoint_path = os.path.join(args.output_dir,
                                               '%s_no_model.pt' % args.checkpoint_name)
                key_blacklist = ['model_state', 'optim_state', 'model_best_state',
                                 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state',
                                 'd_img_state', 'd_img_optim_state', 'd_img_best_state']
                small_checkpoint = {}
                for k, v in checkpoint.items():
                    if k not in key_blacklist:
                        small_checkpoint[k] = v
                torch.save(small_checkpoint, checkpoint_path)
cudnn.benchmark = True
conf = ModelConfig()

if conf.coco:
    train, val = CocoDetection.splits()
    val.ids = val.ids[:conf.val_size]
    train.ids = train.ids
    train_loader, val_loader = CocoDataLoader.splits(
        train,
        val,
        batch_size=conf.batch_size,
        num_workers=conf.num_workers,
        num_gpus=conf.num_gpus)
else:
    train, val, _ = VG.splits(num_val_im=conf.val_size,
                              filter_non_overlap=False,
                              filter_empty_rels=False,
                              use_proposals=conf.use_proposals)
    train_loader, val_loader = VGDataLoader.splits(
        train,
        val,
        batch_size=conf.batch_size,
        num_workers=conf.num_workers,
        num_gpus=conf.num_gpus)

detector = ObjectDetector(
    classes=train.ind_to_classes,
    num_gpus=conf.num_gpus,
    mode='rpntrain' if not conf.use_proposals else 'proposals',
    use_resnet=conf.use_resnet)
detector.cuda()
Example #7
0
import numpy as np
import torch

from config import ModelConfig
from lib.pytorch_misc import optimistic_restore
from lib.evaluation.sg_eval_visual import BasicSceneGraphEvaluator
from tqdm import tqdm
from config import BOX_SCALE, IM_SCALE
from lib.fpn.box_utils import bbox_overlaps
from collections import defaultdict
from PIL import Image, ImageDraw, ImageFont
import os
from functools import reduce

conf = ModelConfig()
train, val, test = VG.splits(num_val_im=conf.val_size)
if conf.test:
    val = test

train_loader, val_loader = VGDataLoader.splits(train,
                                               val,
                                               mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

detector = RelModel(
    classes=train.ind_to_classes,
    rel_classes=train.ind_to_predicates,
    num_gpus=conf.num_gpus,
    mode=conf.mode,
Example #8
0
parser.add_argument('-cache_dir',
                    dest='cache_dir',
                    help='dir to load cache predicted results',
                    type=str,
                    default='caches/kern_sgcls.pkl')

args = parser.parse_args()
os.makedirs(args.save_dir, exist_ok=True)
image_dir = os.path.join(args.save_dir, 'images')
graph_dir = os.path.join(args.save_dir, 'graphs')
os.makedirs(image_dir, exist_ok=True)
os.makedirs(graph_dir, exist_ok=True)
mode = 'sgdet'  # this code is only for sgcls task

train, _, _ = VG.splits(num_val_im=5000,
                        filter_duplicate_rels=True,
                        use_proposals=False,
                        filter_non_overlap=False)
'''

train,_, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                         use_proposals=conf.use_proposals,
                         filter_non_overlap=conf.mode == 'sgdet')
'''
vcrdata = VCRDataset()
vcrdataloader = DataLoader(
    vcrdata,
    batch_size=1,
    shuffle=True,
    batch_sampler=None,
    num_workers=1,
    collate_fn=lambda x: vg_collate(x, mode='rel', num_gpus=1, is_train=False),
Example #9
0
    from lib.shz_models.rel_model_depth_union import RelModel
elif conf.model == 'shz_fusion':
    from lib.shz_models.rel_model_fusion import RelModel
elif conf.model == 'shz_fusion_beta':
    from lib.shz_models.rel_model_fusion_beta import RelModel
# --
else:
    raise ValueError()

# -- Create Tensorboard summary writer
writer = SummaryWriter(comment='_run#'+ conf.save_dir.split('/')[-1])

# -- Create dataset splits and dataset loader
train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                          use_proposals=conf.use_proposals,
                          filter_non_overlap=conf.mode == 'sgdet',
                          # -- Depth dataset parameters
                          use_depth=conf.load_depth,
                          three_channels_depth=conf.pretrained_depth)

train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus,
                                               # -- Depth dataset parameters
                                               use_depth=conf.load_depth)

# -- Create the specified Relation-Detection model
detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                    num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                    use_resnet=conf.use_resnet, order=conf.order,
                    nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
from lib.get_dataset_counts import get_counts, box_filter

from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, DATA_PATH, BOX_SCALE, IM_SCALE, PROPOSAL_FN
import torch.backends.cudnn as cudnn
from lib.pytorch_misc import optimistic_restore, nonintersecting_2d_inds
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
from tqdm import tqdm
from copy import deepcopy
import dill as pkl

cudnn.benchmark = True
conf = ModelConfig()

MUST_OVERLAP = False
train, val, test = VG.splits(num_val_im=conf.val_size,
                             filter_non_overlap=MUST_OVERLAP,
                             filter_duplicate_rels=True,
                             use_proposals=conf.use_proposals)
if conf.test:
    print("test data!")
    val = test
train_loader, val_loader = VGDataLoader.splits(train,
                                               val,
                                               mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus)

fg_matrix, bg_matrix = get_counts(train_data=train, must_overlap=MUST_OVERLAP)

detector = ObjectDetector(
    classes=train.ind_to_classes,
Example #11
0
conf = ModelConfig()

# -- Set random seed
if conf.rnd_seed is not None:
    set_random_seed(conf.rnd_seed)

# -- Get model configuration
conf = ModelConfig()

# -- Create Tensorboard summary writer
writer = SummaryWriter(comment='_run#' + conf.save_dir.split('/')[-1])

# -- Create dataset splits and dataset loader
train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                          use_proposals=False,
                          filter_non_overlap=False,
                          # -- (ADDED) add depth related parameters
                          use_depth=True,
                          three_channels_depth=False)

train_loader, val_loader = VGDataLoader.splits(train, val, mode='det',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus,
                                               use_depth=True)
# -- Create Auto-Encoder model
detector = AEModel(num_gpus=conf.num_gpus, depth_model=conf.depth_model)

# -- Print model parameters
print(print_para(detector), flush=True)

# -- Load the specified checkpoint
Example #12
0
def main():
    args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1'
    sys.argv = args.split(' ')
    conf = ModelConfig()

    if conf.model == 'motifnet':
        from lib.rel_model import RelModel
    elif conf.model == 'stanford':
        from lib.rel_model_stanford import RelModelStanford as RelModel
    else:
        raise ValueError()

    train, val, test = VG.splits(
        num_val_im=conf.val_size, filter_duplicate_rels=True,
        use_proposals=conf.use_proposals,
        filter_non_overlap=conf.mode == 'sgdet',
    )
    if conf.test:
        val = test
    train_loader, val_loader = VGDataLoader.splits(
        train, val, mode='rel', batch_size=conf.batch_size,
        num_workers=conf.num_workers, num_gpus=conf.num_gpus
    )

    detector = RelModel(
        classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
        num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
        use_resnet=conf.use_resnet, order=conf.order,
        nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
        use_proposals=conf.use_proposals,
        pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
        pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
        pooling_dim=conf.pooling_dim,
        rec_dropout=conf.rec_dropout,
        use_bias=conf.use_bias,
        use_tanh=conf.use_tanh,
        limit_vision=conf.limit_vision
    )


    detector.cuda()
    ckpt = torch.load(conf.ckpt)

    optimistic_restore(detector, ckpt['state_dict'])

    evaluator = BasicSceneGraphEvaluator.all_modes(
        multiple_preds=conf.multi_pred)

    mode, N = 'test.multi_pred', 20
    recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb'))

    np.random.seed(0)
    # sorted_idxs = np.argsort(recs)
    selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False)
    sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])]
    print('Sorted idxs: {}'.format(sorted_idxs.tolist()))

    save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi'

    for idx in selected_idxs:
        gt_entry = {
            'gt_classes': val.gt_classes[idx].copy(),
            'gt_relations': val.relationships[idx].copy(),
            'gt_boxes': val.gt_boxes[idx].copy(),
        }

        detector.eval()
        det_res = detector[vg_collate([test[idx]], num_gpus=1)]

        boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res
        pred_entry = {
            'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
            'pred_classes': objs_i,
            'pred_rel_inds': rels_i,
            'obj_scores': obj_scores_i,
            'rel_scores': pred_scores_i,
        }


        unique_cnames = get_unique_cnames(gt_entry, test)
        save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir)
        save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir)
        save_pred_graph(idx, test, pred_entry, det_res,
                        unique_cnames, save_dir,
                        multi_pred=conf.multi_pred, n_pred=20)
"""
SCRIPT TO MAKE MEMES. this was from an old version of the code, so it might require some fixes to get working.

"""
from dataloaders.visual_genome import VG
# import matplotlib
# # matplotlib.use('Agg')
from tqdm import tqdm
import seaborn as sns
import numpy as np
from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
from collections import defaultdict
train, val, test = VG.splits(filter_non_overlap=False, num_val_im=2000)

count_threshold = 50
pmi_threshold = 10

o_type = []
f = open("object_types.txt")
for line in f.readlines():
    tabs = line.strip().split("\t")
    t = tabs[1].split("_")[0]
    o_type.append(t)

r_type = []
f = open("relation_types.txt")
for line in f.readlines():
    tabs = line.strip().split("\t")
    t = tabs[1].split("_")[0]
    r_type.append(t)
Example #14
0
import pandas as pd
import time
from tqdm import tqdm
from torch.nn.functional import cross_entropy as CE
from lib.pytorch_misc import *
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry
import pickle
from lib.rel_model_stanford import RelModelStanford

EVAL_MODES = ['sgdet'] if conf.mode == 'sgdet' else ['predcls', 'sgcls']
assert conf.mode in EVAL_MODES, (conf.mode, 'other modes not supported')

train, val_splits = VG.splits(data_dir=conf.data,
                              num_val_im=conf.val_size,
                              min_graph_size=conf.min_graph_size,
                              max_graph_size=conf.max_graph_size,
                              mrcnn=conf.detector == 'mrcnn',
                              filter_non_overlap=conf.mode == 'sgdet',
                              exclude_left_right=conf.exclude_left_right)

train_loader, val_loaders = VGDataLoader.splits(train,
                                                val_splits,
                                                mode='rel',
                                                batch_size=conf.batch_size,
                                                num_workers=conf.num_workers,
                                                num_gpus=conf.num_gpus)
val_loader, val_loader_zs, test_loader, test_loader_zs = val_loaders

detector = RelModelStanford(train_data=train,
                            num_gpus=conf.num_gpus,
                            mode=conf.mode,
Example #15
0
    def __init__(self,
                 classes,
                 rel_classes,
                 embed_dim,
                 obj_dim,
                 inputs_dim,
                 hidden_dim,
                 pooling_dim,
                 recurrent_dropout_probability=0.2,
                 use_highway=True,
                 use_input_projection_bias=True,
                 use_vision=True,
                 use_bias=True,
                 use_tanh=True,
                 limit_vision=True,
                 sl_pretrain=False,
                 num_iter=-1):
        """
        Initializes the RNN
        :param embed_dim: Dimension of the embeddings
        :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes
        :param hidden_dim: Hidden dim of the decoder
        :param vocab_size: Number of words in the vocab
        :param bos_token: To use during decoding (non teacher forcing mode))
        :param bos: beginning of sentence token
        :param unk: unknown token (not used)
        """
        super(DecoderRNN, self).__init__()

        self.rel_embedding_dim = 100
        self.classes = classes
        self.rel_classes = rel_classes
        embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=100)
        self.obj_embed = nn.Embedding(len(self.classes), embed_dim)
        self.obj_embed.weight.data = embed_vecs

        embed_rels = obj_edge_vectors(self.rel_classes,
                                      wv_dim=self.rel_embedding_dim)
        self.rel_embed = nn.Embedding(len(self.rel_classes),
                                      self.rel_embedding_dim)
        self.rel_embed.weight.data = embed_rels

        self.embed_dim = embed_dim
        self.obj_dim = obj_dim
        self.hidden_size = hidden_dim
        self.inputs_dim = inputs_dim
        self.pooling_dim = pooling_dim
        self.nms_thresh = 0.3

        self.use_vision = use_vision
        self.use_bias = use_bias
        self.use_tanh = use_tanh
        self.limit_vision = limit_vision
        self.sl_pretrain = sl_pretrain
        self.num_iter = num_iter

        self.recurrent_dropout_probability = recurrent_dropout_probability
        self.use_highway = use_highway
        # We do the projections for all the gates all at once, so if we are
        # using highway layers, we need some extra projections, which is
        # why the sizes of the Linear layers change here depending on this flag.
        if use_highway:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                6 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   5 * self.hidden_size,
                                                   bias=True)
        else:
            self.input_linearity = torch.nn.Linear(
                self.input_size,
                4 * self.hidden_size,
                bias=use_input_projection_bias)
            self.state_linearity = torch.nn.Linear(self.hidden_size,
                                                   4 * self.hidden_size,
                                                   bias=True)

        # self.obj_in_lin = torch.nn.Linear(self.rel_embedding_dim, self.rel_embedding_dim, bias=True)

        self.out = nn.Linear(self.hidden_size, len(self.classes))
        self.reset_parameters()

        # For relation predication
        embed_vecs2 = obj_edge_vectors(self.classes, wv_dim=embed_dim)
        self.obj_embed2 = nn.Embedding(self.num_classes, embed_dim)
        self.obj_embed2.weight.data = embed_vecs2.clone()

        # self.post_lstm = nn.Linear(self.hidden_dim, self.pooling_dim * 2)
        self.post_lstm = nn.Linear(self.obj_dim + 2 * self.embed_dim + 128,
                                   self.pooling_dim * 2)
        # Initialize to sqrt(1/2n) so that the outputs all have mean 0 and variance 1.
        # (Half contribution comes from LSTM, half from embedding.
        # In practice the pre-lstm stuff tends to have stdev 0.1 so I multiplied this by 10.
        self.post_lstm.weight.data.normal_(
            0, 10.0 * math.sqrt(1.0 / self.hidden_size)
        )  ######## there may need more consideration
        self.post_lstm.bias.data.zero_()

        self.rel_compress = nn.Linear(self.pooling_dim,
                                      self.num_rels,
                                      bias=True)
        self.rel_compress.weight = torch.nn.init.xavier_normal(
            self.rel_compress.weight, gain=1.0)
        if self.use_bias:
            self.freq_bias = FrequencyBias()

            # simple relation model
            from dataloaders.visual_genome import VG
            from lib.get_dataset_counts import get_counts, box_filter
            fg_matrix, bg_matrix = get_counts(train_data=VG.splits(
                num_val_im=5000,
                filter_non_overlap=True,
                filter_duplicate_rels=True,
                use_proposals=False)[0],
                                              must_overlap=True)
            prob_matrix = fg_matrix.astype(np.float32)
            prob_matrix[:, :, 0] = bg_matrix

            # TRYING SOMETHING NEW.
            prob_matrix[:, :, 0] += 1
            prob_matrix /= np.sum(prob_matrix, 2)[:, :, None]
            # prob_matrix /= float(fg_matrix.max())

            prob_matrix[:, :, 0] = 0  # Zero out BG
            self.prob_matrix = prob_matrix