def main(): fname = os.path.join(conf.save_dir, 'train_losses.csv') train_f = open(fname, 'w') train_f.write( 'iter,class_loss,rel_loss,total,recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) train_f.flush() fname = os.path.join(conf.save_dir, 'val_losses.csv') val_f = open(fname, 'w') val_f.write( 'recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) val_f.flush() train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision, lml_topk=conf.lml_topk, lml_softmax=conf.lml_softmax, entr_topk=conf.entr_topk, ml_loss=conf.ml_loss) # Freeze the detector for n, param in detector.detector.named_parameters(): param.requires_grad = False print(print_para(detector), flush=True) ckpt = torch.load(conf.ckpt) if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel': print("Loading EVERYTHING") start_epoch = ckpt['epoch'] if not optimistic_restore(detector, ckpt['state_dict']): start_epoch = -1 # optimistic_restore( # detector.detector, # torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'] # ) else: start_epoch = -1 optimistic_restore(detector.detector, ckpt['state_dict']) detector.roi_fmap[1][0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap[1][3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap[1][0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap[1][3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.roi_fmap_obj[0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap_obj[3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap_obj[0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap_obj[3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.cuda() print("Training starts now!") optimizer, scheduler = get_optim(detector, conf.lr * conf.num_gpus * conf.batch_size) best_eval = None for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs): rez = train_epoch(epoch, detector, train, train_loader, optimizer, conf, train_f) print("overall{:2d}: ({:.3f})\n{}".format(epoch, rez.mean(1)['total'], rez.mean(1)), flush=True) mAp = val_epoch(detector, val, val_loader, val_f) scheduler.step(mAp) if conf.save_dir is not None: if best_eval is None or mAp > best_eval: torch.save( { 'epoch': epoch, 'state_dict': detector.state_dict(), # 'optimizer': optimizer.state_dict(), }, os.path.join(conf.save_dir, 'best-val.tar')) best_eval = mAp
import torch from config import ModelConfig from lib.pytorch_misc import optimistic_restore from tqdm import tqdm from config import BOX_SCALE, IM_SCALE from lib.fpn.box_utils import bbox_overlaps from collections import defaultdict from PIL import Image, ImageDraw, ImageFont import os from functools import reduce import json from collections import OrderedDict conf = ModelConfig() train, val, test = VG.splits(num_val_im=conf.val_size, filter_non_overlap=False) set_name = '' if conf.test: val = test set_name = 'test' elif conf.val: val = val set_name = 'val' else: val = train set_name = 'train' train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers,
ixmax = np.minimum(gt_box[2], pred_boxes[:, 2]) iymax = np.minimum(gt_box[3], pred_boxes[:, 3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) + (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) * (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters) overlaps = inters / uni return overlaps train, val, test = VG.splits() result_dict_mine = {'sgdet_recall': {20: [], 50: [], 100: []}} result_dict_theirs = {'sgdet_recall': {20: [], 50: [], 100: []}} for img_i in trange(len(val)): gt_entry = { 'gt_classes': val.gt_classes[img_i].copy(), 'gt_relations': val.relationships[img_i].copy(), 'gt_boxes': val.gt_boxes[img_i].copy(), } # Use shuffled GT boxes gt_indices = np.arange( gt_entry['gt_boxes'].shape[0] ) #np.random.choice(gt_entry['gt_boxes'].shape[0], 20)
from config import ModelConfig from lib.pytorch_misc import optimistic_restore from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry from tqdm import tqdm from config import BOX_SCALE, IM_SCALE import dill as pkl import os from lib.kern_model import KERN conf = ModelConfig() train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') ind_to_predicates = train.ind_to_predicates # ind_to_predicates[0] means no relationship if conf.test: val = test train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = KERN(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, use_proposals=conf.use_proposals, use_ggnn_obj=conf.use_ggnn_obj, ggnn_obj_time_step_num=conf.ggnn_obj_time_step_num, ggnn_obj_hidden_dim=conf.ggnn_obj_hidden_dim, ggnn_obj_output_dim=conf.ggnn_obj_output_dim, use_obj_knowledge=conf.use_obj_knowledge, obj_knowledge=conf.obj_knowledge,
def main(args): print(args) check_args(args) if not exists(args.output_dir): os.makedirs(args.output_dir) summary_writer = SummaryWriter(args.output_dir) if args.coco: train, val = CocoDetection.splits() val.ids = val.ids[:args.val_size] train.ids = train.ids train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=args.batch_size, num_workers=args.num_workers, num_gpus=args.num_gpus) else: train, val, _ = VG.splits(num_val_im=args.val_size, filter_non_overlap=False, filter_empty_rels=False, use_proposals=args.use_proposals) train_loader, val_loader = VGDataLoader.splits(train, val, batch_size=args.batch_size, num_workers=args.num_workers, num_gpus=args.num_gpus) print(train.ind_to_classes) os._exit(0) all_in_one_model = neural_motifs_sg2im_model(args, train.ind_to_classes) # Freeze the detector for n, param in all_in_one_model.detector.named_parameters(): param.requires_grad = False all_in_one_model.cuda() gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type) t, epoch, checkpoint = all_in_one_model.t, all_in_one_model.epoch, all_in_one_model.checkpoint while True: if t >= args.num_iterations: break epoch += 1 print('Starting epoch %d' % epoch) for step, batch in enumerate(tqdm(train_loader, desc='Training Epoch %d' % epoch, total=len(train_loader))): if t == args.eval_mode_after: print('switching to eval mode') all_in_one_model.model.eval() all_in_one_model.optimizer = optim.Adam(all_in_one_model.parameters(), lr=args.learning_rate) t += 1 with timeit('forward', args.timing): result = all_in_one_model[batch] imgs, imgs_pred, objs, g_scores_fake_crop, g_obj_scores_fake_crop, g_scores_fake_img, \ d_scores_fake_crop, d_obj_scores_fake_crop, d_scores_real_crop, d_obj_scores_real_crop, \ d_scores_fake_img, d_scores_real_img = result.imgs, result.imgs_pred, result.objs, \ result.g_scores_fake_crop, result.g_obj_scores_fake_crop, result.g_scores_fake_img, \ result.d_scores_fake_crop, result.d_obj_scores_fake_crop, result.d_scores_real_crop, \ result.d_obj_scores_real_crop, result.d_scores_fake_img, result.d_scores_real_img with timeit('loss', args.timing): total_loss, losses = calculate_model_losses( args, imgs, imgs_pred) if all_in_one_model.obj_discriminator is not None: total_loss = add_loss(total_loss, F.cross_entropy(g_obj_scores_fake_crop, objs), losses, 'ac_loss', args.ac_loss_weight) weight = args.discriminator_loss_weight * args.d_obj_weight total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_crop), losses, 'g_gan_obj_loss', weight) if all_in_one_model.img_discriminator is not None: weight = args.discriminator_loss_weight * args.d_img_weight total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_img), losses, 'g_gan_img_loss', weight) losses['total_loss'] = total_loss.item() if not math.isfinite(losses['total_loss']): print('WARNING: Got loss = NaN, not backpropping') continue with timeit('backward', args.timing): all_in_one_model.optimizer.zero_grad() total_loss.backward() all_in_one_model.optimizer.step() if all_in_one_model.obj_discriminator is not None: with timeit('d_obj loss', args.timing): d_obj_losses = LossManager() d_obj_gan_loss = gan_d_loss(d_scores_real_crop, d_scores_fake_crop) d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss') d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_real_crop, objs), 'd_ac_loss_real') d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_fake_crop, objs), 'd_ac_loss_fake') with timeit('d_obj backward', args.timing): all_in_one_model.optimizer_d_obj.zero_grad() d_obj_losses.total_loss.backward() all_in_one_model.optimizer_d_obj.step() if all_in_one_model.img_discriminator is not None: with timeit('d_img loss', args.timing): d_img_losses = LossManager() d_img_gan_loss = gan_d_loss(d_scores_real_img, d_scores_fake_img) d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss') with timeit('d_img backward', args.timing): all_in_one_model.optimizer_d_img.zero_grad() d_img_losses.total_loss.backward() all_in_one_model.optimizer_d_img.step() if t % args.print_every == 0: print('t = %d / %d' % (t, args.num_iterations)) G_loss_list = [] for name, val in losses.items(): G_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['losses'][name].append(val) summary_writer.add_scalar("G_%s" % name, val, t) print("G: %s" % ", ".join(G_loss_list)) checkpoint['losses_ts'].append(t) if all_in_one_model.obj_discriminator is not None: D_obj_loss_list = [] for name, val in d_obj_losses.items(): D_obj_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['d_losses'][name].append(val) summary_writer.add_scalar("D_obj_%s" % name, val, t) print("D_obj: %s" % ", ".join(D_obj_loss_list)) if all_in_one_model.img_discriminator is not None: D_img_loss_list = [] for name, val in d_img_losses.items(): D_img_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['d_losses'][name].append(val) summary_writer.add_scalar("D_img_%s" % name, val, t) print("D_img: %s" % ", ".join(D_img_loss_list)) if t % args.checkpoint_every == 0: print('checking on train') train_results = check_model(args, train_loader, all_in_one_model) t_losses, t_samples = train_results checkpoint['train_samples'].append(t_samples) checkpoint['checkpoint_ts'].append(t) for name, images in t_samples.items(): summary_writer.add_image("train_%s" % name, images, t) print('checking on val') val_results = check_model(args, val_loader, all_in_one_model) val_losses, val_samples = val_results checkpoint['val_samples'].append(val_samples) for name, images in val_samples.items(): summary_writer.add_image("val_%s" % name, images, t) for k, v in val_losses.items(): checkpoint['val_losses'][k].append(v) summary_writer.add_scalar("val_%s" % k, v, t) checkpoint['model_state'] = all_in_one_model.model.state_dict() if all_in_one_model.obj_discriminator is not None: checkpoint['d_obj_state'] = all_in_one_model.obj_discriminator.state_dict() checkpoint['d_obj_optim_state'] = all_in_one_model.optimizer_d_obj.state_dict() if all_in_one_model.img_discriminator is not None: checkpoint['d_img_state'] = all_in_one_model.img_discriminator.state_dict() checkpoint['d_img_optim_state'] = all_in_one_model.optimizer_d_img.state_dict() checkpoint['optim_state'] = all_in_one_model.optimizer.state_dict() checkpoint['counters']['t'] = t checkpoint['counters']['epoch'] = epoch checkpoint_path = os.path.join(args.output_dir, '%s_with_model.pt' % args.checkpoint_name) print('Saving checkpoint to ', checkpoint_path) torch.save(checkpoint, checkpoint_path) # Save another checkpoint without any model or optim state checkpoint_path = os.path.join(args.output_dir, '%s_no_model.pt' % args.checkpoint_name) key_blacklist = ['model_state', 'optim_state', 'model_best_state', 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state', 'd_img_state', 'd_img_optim_state', 'd_img_best_state'] small_checkpoint = {} for k, v in checkpoint.items(): if k not in key_blacklist: small_checkpoint[k] = v torch.save(small_checkpoint, checkpoint_path)
cudnn.benchmark = True conf = ModelConfig() if conf.coco: train, val = CocoDetection.splits() val.ids = val.ids[:conf.val_size] train.ids = train.ids train_loader, val_loader = CocoDataLoader.splits( train, val, batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) else: train, val, _ = VG.splits(num_val_im=conf.val_size, filter_non_overlap=False, filter_empty_rels=False, use_proposals=conf.use_proposals) train_loader, val_loader = VGDataLoader.splits( train, val, batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = ObjectDetector( classes=train.ind_to_classes, num_gpus=conf.num_gpus, mode='rpntrain' if not conf.use_proposals else 'proposals', use_resnet=conf.use_resnet) detector.cuda()
import numpy as np import torch from config import ModelConfig from lib.pytorch_misc import optimistic_restore from lib.evaluation.sg_eval_visual import BasicSceneGraphEvaluator from tqdm import tqdm from config import BOX_SCALE, IM_SCALE from lib.fpn.box_utils import bbox_overlaps from collections import defaultdict from PIL import Image, ImageDraw, ImageFont import os from functools import reduce conf = ModelConfig() train, val, test = VG.splits(num_val_im=conf.val_size) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode,
parser.add_argument('-cache_dir', dest='cache_dir', help='dir to load cache predicted results', type=str, default='caches/kern_sgcls.pkl') args = parser.parse_args() os.makedirs(args.save_dir, exist_ok=True) image_dir = os.path.join(args.save_dir, 'images') graph_dir = os.path.join(args.save_dir, 'graphs') os.makedirs(image_dir, exist_ok=True) os.makedirs(graph_dir, exist_ok=True) mode = 'sgdet' # this code is only for sgcls task train, _, _ = VG.splits(num_val_im=5000, filter_duplicate_rels=True, use_proposals=False, filter_non_overlap=False) ''' train,_, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') ''' vcrdata = VCRDataset() vcrdataloader = DataLoader( vcrdata, batch_size=1, shuffle=True, batch_sampler=None, num_workers=1, collate_fn=lambda x: vg_collate(x, mode='rel', num_gpus=1, is_train=False),
from lib.shz_models.rel_model_depth_union import RelModel elif conf.model == 'shz_fusion': from lib.shz_models.rel_model_fusion import RelModel elif conf.model == 'shz_fusion_beta': from lib.shz_models.rel_model_fusion_beta import RelModel # -- else: raise ValueError() # -- Create Tensorboard summary writer writer = SummaryWriter(comment='_run#'+ conf.save_dir.split('/')[-1]) # -- Create dataset splits and dataset loader train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', # -- Depth dataset parameters use_depth=conf.load_depth, three_channels_depth=conf.pretrained_depth) train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus, # -- Depth dataset parameters use_depth=conf.load_depth) # -- Create the specified Relation-Detection model detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
from lib.get_dataset_counts import get_counts, box_filter from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, DATA_PATH, BOX_SCALE, IM_SCALE, PROPOSAL_FN import torch.backends.cudnn as cudnn from lib.pytorch_misc import optimistic_restore, nonintersecting_2d_inds from lib.evaluation.sg_eval import BasicSceneGraphEvaluator from tqdm import tqdm from copy import deepcopy import dill as pkl cudnn.benchmark = True conf = ModelConfig() MUST_OVERLAP = False train, val, test = VG.splits(num_val_im=conf.val_size, filter_non_overlap=MUST_OVERLAP, filter_duplicate_rels=True, use_proposals=conf.use_proposals) if conf.test: print("test data!") val = test train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) fg_matrix, bg_matrix = get_counts(train_data=train, must_overlap=MUST_OVERLAP) detector = ObjectDetector( classes=train.ind_to_classes,
conf = ModelConfig() # -- Set random seed if conf.rnd_seed is not None: set_random_seed(conf.rnd_seed) # -- Get model configuration conf = ModelConfig() # -- Create Tensorboard summary writer writer = SummaryWriter(comment='_run#' + conf.save_dir.split('/')[-1]) # -- Create dataset splits and dataset loader train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=False, filter_non_overlap=False, # -- (ADDED) add depth related parameters use_depth=True, three_channels_depth=False) train_loader, val_loader = VGDataLoader.splits(train, val, mode='det', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus, use_depth=True) # -- Create Auto-Encoder model detector = AEModel(num_gpus=conf.num_gpus, depth_model=conf.depth_model) # -- Print model parameters print(print_para(detector), flush=True) # -- Load the specified checkpoint
def main(): args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1' sys.argv = args.split(' ') conf = ModelConfig() if conf.model == 'motifnet': from lib.rel_model import RelModel elif conf.model == 'stanford': from lib.rel_model_stanford import RelModelStanford as RelModel else: raise ValueError() train, val, test = VG.splits( num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', ) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus ) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision ) detector.cuda() ckpt = torch.load(conf.ckpt) optimistic_restore(detector, ckpt['state_dict']) evaluator = BasicSceneGraphEvaluator.all_modes( multiple_preds=conf.multi_pred) mode, N = 'test.multi_pred', 20 recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb')) np.random.seed(0) # sorted_idxs = np.argsort(recs) selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False) sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])] print('Sorted idxs: {}'.format(sorted_idxs.tolist())) save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi' for idx in selected_idxs: gt_entry = { 'gt_classes': val.gt_classes[idx].copy(), 'gt_relations': val.relationships[idx].copy(), 'gt_boxes': val.gt_boxes[idx].copy(), } detector.eval() det_res = detector[vg_collate([test[idx]], num_gpus=1)] boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } unique_cnames = get_unique_cnames(gt_entry, test) save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir) save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir) save_pred_graph(idx, test, pred_entry, det_res, unique_cnames, save_dir, multi_pred=conf.multi_pred, n_pred=20)
""" SCRIPT TO MAKE MEMES. this was from an old version of the code, so it might require some fixes to get working. """ from dataloaders.visual_genome import VG # import matplotlib # # matplotlib.use('Agg') from tqdm import tqdm import seaborn as sns import numpy as np from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps from collections import defaultdict train, val, test = VG.splits(filter_non_overlap=False, num_val_im=2000) count_threshold = 50 pmi_threshold = 10 o_type = [] f = open("object_types.txt") for line in f.readlines(): tabs = line.strip().split("\t") t = tabs[1].split("_")[0] o_type.append(t) r_type = [] f = open("relation_types.txt") for line in f.readlines(): tabs = line.strip().split("\t") t = tabs[1].split("_")[0] r_type.append(t)
import pandas as pd import time from tqdm import tqdm from torch.nn.functional import cross_entropy as CE from lib.pytorch_misc import * from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry import pickle from lib.rel_model_stanford import RelModelStanford EVAL_MODES = ['sgdet'] if conf.mode == 'sgdet' else ['predcls', 'sgcls'] assert conf.mode in EVAL_MODES, (conf.mode, 'other modes not supported') train, val_splits = VG.splits(data_dir=conf.data, num_val_im=conf.val_size, min_graph_size=conf.min_graph_size, max_graph_size=conf.max_graph_size, mrcnn=conf.detector == 'mrcnn', filter_non_overlap=conf.mode == 'sgdet', exclude_left_right=conf.exclude_left_right) train_loader, val_loaders = VGDataLoader.splits(train, val_splits, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) val_loader, val_loader_zs, test_loader, test_loader_zs = val_loaders detector = RelModelStanford(train_data=train, num_gpus=conf.num_gpus, mode=conf.mode,
def __init__(self, classes, rel_classes, embed_dim, obj_dim, inputs_dim, hidden_dim, pooling_dim, recurrent_dropout_probability=0.2, use_highway=True, use_input_projection_bias=True, use_vision=True, use_bias=True, use_tanh=True, limit_vision=True, sl_pretrain=False, num_iter=-1): """ Initializes the RNN :param embed_dim: Dimension of the embeddings :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes :param hidden_dim: Hidden dim of the decoder :param vocab_size: Number of words in the vocab :param bos_token: To use during decoding (non teacher forcing mode)) :param bos: beginning of sentence token :param unk: unknown token (not used) """ super(DecoderRNN, self).__init__() self.rel_embedding_dim = 100 self.classes = classes self.rel_classes = rel_classes embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=100) self.obj_embed = nn.Embedding(len(self.classes), embed_dim) self.obj_embed.weight.data = embed_vecs embed_rels = obj_edge_vectors(self.rel_classes, wv_dim=self.rel_embedding_dim) self.rel_embed = nn.Embedding(len(self.rel_classes), self.rel_embedding_dim) self.rel_embed.weight.data = embed_rels self.embed_dim = embed_dim self.obj_dim = obj_dim self.hidden_size = hidden_dim self.inputs_dim = inputs_dim self.pooling_dim = pooling_dim self.nms_thresh = 0.3 self.use_vision = use_vision self.use_bias = use_bias self.use_tanh = use_tanh self.limit_vision = limit_vision self.sl_pretrain = sl_pretrain self.num_iter = num_iter self.recurrent_dropout_probability = recurrent_dropout_probability self.use_highway = use_highway # We do the projections for all the gates all at once, so if we are # using highway layers, we need some extra projections, which is # why the sizes of the Linear layers change here depending on this flag. if use_highway: self.input_linearity = torch.nn.Linear( self.input_size, 6 * self.hidden_size, bias=use_input_projection_bias) self.state_linearity = torch.nn.Linear(self.hidden_size, 5 * self.hidden_size, bias=True) else: self.input_linearity = torch.nn.Linear( self.input_size, 4 * self.hidden_size, bias=use_input_projection_bias) self.state_linearity = torch.nn.Linear(self.hidden_size, 4 * self.hidden_size, bias=True) # self.obj_in_lin = torch.nn.Linear(self.rel_embedding_dim, self.rel_embedding_dim, bias=True) self.out = nn.Linear(self.hidden_size, len(self.classes)) self.reset_parameters() # For relation predication embed_vecs2 = obj_edge_vectors(self.classes, wv_dim=embed_dim) self.obj_embed2 = nn.Embedding(self.num_classes, embed_dim) self.obj_embed2.weight.data = embed_vecs2.clone() # self.post_lstm = nn.Linear(self.hidden_dim, self.pooling_dim * 2) self.post_lstm = nn.Linear(self.obj_dim + 2 * self.embed_dim + 128, self.pooling_dim * 2) # Initialize to sqrt(1/2n) so that the outputs all have mean 0 and variance 1. # (Half contribution comes from LSTM, half from embedding. # In practice the pre-lstm stuff tends to have stdev 0.1 so I multiplied this by 10. self.post_lstm.weight.data.normal_( 0, 10.0 * math.sqrt(1.0 / self.hidden_size) ) ######## there may need more consideration self.post_lstm.bias.data.zero_() self.rel_compress = nn.Linear(self.pooling_dim, self.num_rels, bias=True) self.rel_compress.weight = torch.nn.init.xavier_normal( self.rel_compress.weight, gain=1.0) if self.use_bias: self.freq_bias = FrequencyBias() # simple relation model from dataloaders.visual_genome import VG from lib.get_dataset_counts import get_counts, box_filter fg_matrix, bg_matrix = get_counts(train_data=VG.splits( num_val_im=5000, filter_non_overlap=True, filter_duplicate_rels=True, use_proposals=False)[0], must_overlap=True) prob_matrix = fg_matrix.astype(np.float32) prob_matrix[:, :, 0] = bg_matrix # TRYING SOMETHING NEW. prob_matrix[:, :, 0] += 1 prob_matrix /= np.sum(prob_matrix, 2)[:, :, None] # prob_matrix /= float(fg_matrix.max()) prob_matrix[:, :, 0] = 0 # Zero out BG self.prob_matrix = prob_matrix