# Note: if you're doing the stanford setup, you'll need to change this to freeze the lower layers optimizer = optim.SGD([p for p in detector.parameters() if p.requires_grad], weight_decay=conf.l2, lr=conf.lr * conf.num_gpus * conf.batch_size, momentum=0.9) criterion = nn.CrossEntropyLoss() for name in detector.state_dict(): print(name) start_epoch = -1 if conf.ckpt is not None: ckpt = torch.load(conf.ckpt) if optimistic_restore(detector, ckpt['state_dict']): start_epoch = ckpt['epoch'] def train_epoch(epoch_num): detector.train() tr = [] start = time.time() for b, batch in tqdm(enumerate(train_loader)): tr.append(train_batch(batch)) if b % conf.print_interval == 0 and b >= conf.print_interval: mn = pd.concat(tr[-conf.print_interval:], axis=1).mean(1) time_per_batch = (time.time() - start) / conf.print_interval print("\ne{:2d}b{:5d}/{:5d} {:.3f}s/batch, {:.1f}m/epoch".format( epoch_num, b, len(train_loader), time_per_batch,
# lang_stats = language_eval(json.load(open(conf.save_dir+'_print_predictions.json')), test.coco_ids[:2000], conf.save_dir+'_cache.json') ### load the caption generator ################################################## captionGenerator = GCNLSTMModel(train.ix_to_word, train.vocab_size, input_encoding_size=300, Dconv=4096, num_predicate=len(train.ind_to_predicates), rnn_type='lstm', rnn_size=512, num_layers=1, drop_prob_lm=0.5, seq_length=16, seq_per_img=5, att_feat_size=512, num_relation=conf.num_relation, freq_bl=conf.freq_bl) captionGenerator.cuda() print(print_para(captionGenerator), flush=True) if conf.caption_ckpt is not None: caption_ckpt = torch.load(conf.caption_ckpt) start_epoch = caption_ckpt['epoch'] if not optimistic_restore(captionGenerator, caption_ckpt['state_dict']): start_epoch = -1 else: start_epoch = -1 ###### now load the relation detector and set it to test mode!!! ################################### detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, pick_parent=conf.pick_parent, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias,
def main(): fname = os.path.join(conf.save_dir, 'train_losses.csv') train_f = open(fname, 'w') train_f.write( 'iter,class_loss,rel_loss,total,recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) train_f.flush() fname = os.path.join(conf.save_dir, 'val_losses.csv') val_f = open(fname, 'w') val_f.write( 'recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) val_f.flush() train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision, lml_topk=conf.lml_topk, lml_softmax=conf.lml_softmax, entr_topk=conf.entr_topk, ml_loss=conf.ml_loss) # Freeze the detector for n, param in detector.detector.named_parameters(): param.requires_grad = False print(print_para(detector), flush=True) ckpt = torch.load(conf.ckpt) if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel': print("Loading EVERYTHING") start_epoch = ckpt['epoch'] if not optimistic_restore(detector, ckpt['state_dict']): start_epoch = -1 # optimistic_restore( # detector.detector, # torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'] # ) else: start_epoch = -1 optimistic_restore(detector.detector, ckpt['state_dict']) detector.roi_fmap[1][0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap[1][3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap[1][0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap[1][3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.roi_fmap_obj[0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap_obj[3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap_obj[0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap_obj[3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.cuda() print("Training starts now!") optimizer, scheduler = get_optim(detector, conf.lr * conf.num_gpus * conf.batch_size) best_eval = None for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs): rez = train_epoch(epoch, detector, train, train_loader, optimizer, conf, train_f) print("overall{:2d}: ({:.3f})\n{}".format(epoch, rez.mean(1)['total'], rez.mean(1)), flush=True) mAp = val_epoch(detector, val, val_loader, val_f) scheduler.step(mAp) if conf.save_dir is not None: if best_eval is None or mAp > best_eval: torch.save( { 'epoch': epoch, 'state_dict': detector.state_dict(), # 'optimizer': optimizer.state_dict(), }, os.path.join(conf.save_dir, 'best-val.tar')) best_eval = mAp
verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1) return optimizer, scheduler # ii_rel = FocalLoss(train.num_predicates, PREDICATES_WEIGHTS, CURRICULUM, size_average=False) # ii_obj = FocalLoss(train.num_classes, size_average=True) ckpt = torch.load(conf.ckpt) if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel': logger.info("Loading EVERYTHING") start_epoch = ckpt['epoch'] if not optimistic_restore(detector, ckpt['state_dict']): start_epoch = -1 # optimistic_restore(detector.detector, torch.load('checkpoints/vgdet/vg-28.tar')['state_dict']) else: start_epoch = -1 optimistic_restore(detector.detector, ckpt['state_dict']) detector.roi_fmap[1][0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap[1][3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap[1][0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap[1][3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias'])
def __init__(self, args, ind_to_classes): super(neural_motifs_sg2im_model, self).__init__() self.args = args # define and initial detector self.detector = ObjectDetector( classes=ind_to_classes, num_gpus=args.num_gpus, mode='refinerels' if not args.use_proposals else 'proposals', use_resnet=args.use_resnet) if args.ckpt is not None: ckpt = torch.load(args.ckpt) optimistic_restore(self.detector, ckpt['state_dict']) self.detector.eval() # define and initial generator, image_discriminator, obj_discriminator, # and corresponding optimizer vocab = { 'object_idx_to_name': ind_to_classes, } self.model, model_kwargs = build_model(args) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.learning_rate) self.obj_discriminator, d_obj_kwargs = build_obj_discriminator( args, vocab) self.img_discriminator, d_img_kwargs = build_img_discriminator(args) if self.obj_discriminator is not None: self.obj_discriminator.train() self.optimizer_d_obj = torch.optim.Adam( self.obj_discriminator.parameters(), lr=args.learning_rate) if self.img_discriminator is not None: self.img_discriminator.train() self.optimizer_d_img = torch.optim.Adam( self.img_discriminator.parameters(), lr=args.learning_rate) restore_path = None if args.restore_from_checkpoint: restore_path = '%s_with_model.pt' % args.checkpoint_name restore_path = os.path.join(args.output_dir, restore_path) if restore_path is not None and os.path.isfile(restore_path): print('Restoring from checkpoint:') print(restore_path) checkpoint = torch.load(restore_path) self.model.load_state_dict(checkpoint['model_state']) self.optimizer.load_state_dict(checkpoint['optim_state']) if self.obj_discriminator is not None: self.obj_discriminator.load_state_dict( checkpoint['d_obj_state']) self.optimizer_d_obj.load_state_dict( checkpoint['d_obj_optim_state']) if self.img_discriminator is not None: self.img_discriminator.load_state_dict( checkpoint['d_img_state']) self.optimizer_d_img.load_state_dict( checkpoint['d_img_optim_state']) t = checkpoint['counters']['t'] if 0 <= args.eval_mode_after <= t: self.model.eval() else: self.model.train() epoch = checkpoint['counters']['epoch'] else: t, epoch = 0, 0 checkpoint = { 'vocab': vocab, 'model_kwargs': model_kwargs, 'd_obj_kwargs': d_obj_kwargs, 'd_img_kwargs': d_img_kwargs, 'losses_ts': [], 'losses': defaultdict(list), 'd_losses': defaultdict(list), 'checkpoint_ts': [], 'train_batch_data': [], 'train_samples': [], 'train_iou': [], 'val_batch_data': [], 'val_samples': [], 'val_losses': defaultdict(list), 'val_iou': [], 'norm_d': [], 'norm_g': [], 'counters': { 't': None, 'epoch': None, }, 'model_state': None, 'model_best_state': None, 'optim_state': None, 'd_obj_state': None, 'd_obj_best_state': None, 'd_obj_optim_state': None, 'd_img_state': None, 'd_img_best_state': None, 'd_img_optim_state': None, 'best_t': [], } self.t, self.epoch, self.checkpoint = t, epoch, checkpoint
'max', patience=3, factor=0.1, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1) return optimizer, scheduler ckpt = torch.load(conf.ckpt) if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel': print("Loading EVERYTHING") start_epoch = ckpt['epoch'] if not optimistic_restore(detector, ckpt['state_dict']): start_epoch = -1 # optimistic_restore(detector.detector, torch.load('checkpoints/vgdet/vg-28.tar')['state_dict']) else: start_epoch = -1 optimistic_restore(detector.detector, ckpt['state_dict']) #物体检测器参数恢复 #两个Linear layer,来自VGG16的预训练好的层 detector.roi_fmap[1][0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap[1][3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap[1][0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap[1][3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias'])
nms_filter_duplicates=True, thresh=0.01) detector.eval() detector.cuda() classifier = ObjectDetector(classes=train.ind_to_classes, num_gpus=conf.num_gpus, mode='gtbox', use_resnet=conf.use_resnet, nms_filter_duplicates=True, thresh=0.01) classifier.eval() classifier.cuda() ckpt = torch.load(conf.ckpt) mismatch = optimistic_restore(detector, ckpt['state_dict']) mismatch = optimistic_restore(classifier, ckpt['state_dict']) MOST_COMMON_MODE = True if MOST_COMMON_MODE: prob_matrix = fg_matrix.astype(np.float32) prob_matrix[:, :, 0] = bg_matrix # TRYING SOMETHING NEW. prob_matrix[:, :, 0] += 1 prob_matrix /= np.sum(prob_matrix, 2)[:, :, None] # prob_matrix /= float(fg_matrix.max()) np.save(os.path.join(DATA_PATH, 'pred_stats.npy'), prob_matrix) prob_matrix[:, :, 0] = 0 # Zero out BG
def main(): args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1' sys.argv = args.split(' ') conf = ModelConfig() if conf.model == 'motifnet': from lib.rel_model import RelModel elif conf.model == 'stanford': from lib.rel_model_stanford import RelModelStanford as RelModel else: raise ValueError() train, val, test = VG.splits( num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', ) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus ) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision ) detector.cuda() ckpt = torch.load(conf.ckpt) optimistic_restore(detector, ckpt['state_dict']) evaluator = BasicSceneGraphEvaluator.all_modes( multiple_preds=conf.multi_pred) mode, N = 'test.multi_pred', 20 recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb')) np.random.seed(0) # sorted_idxs = np.argsort(recs) selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False) sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])] print('Sorted idxs: {}'.format(sorted_idxs.tolist())) save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi' for idx in selected_idxs: gt_entry = { 'gt_classes': val.gt_classes[idx].copy(), 'gt_relations': val.relationships[idx].copy(), 'gt_boxes': val.gt_boxes[idx].copy(), } detector.eval() det_res = detector[vg_collate([test[idx]], num_gpus=1)] boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } unique_cnames = get_unique_cnames(gt_entry, test) save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir) save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir) save_pred_graph(idx, test, pred_entry, det_res, unique_cnames, save_dir, multi_pred=conf.multi_pred, n_pred=20)