def main(): fname = os.path.join(conf.save_dir, 'train_losses.csv') train_f = open(fname, 'w') train_f.write( 'iter,class_loss,rel_loss,total,recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) train_f.flush() fname = os.path.join(conf.save_dir, 'val_losses.csv') val_f = open(fname, 'w') val_f.write( 'recall20,recall50,recall100,recall20_con,recall50_con,recall100_con\n' ) val_f.flush() train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision, lml_topk=conf.lml_topk, lml_softmax=conf.lml_softmax, entr_topk=conf.entr_topk, ml_loss=conf.ml_loss) # Freeze the detector for n, param in detector.detector.named_parameters(): param.requires_grad = False print(print_para(detector), flush=True) ckpt = torch.load(conf.ckpt) if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel': print("Loading EVERYTHING") start_epoch = ckpt['epoch'] if not optimistic_restore(detector, ckpt['state_dict']): start_epoch = -1 # optimistic_restore( # detector.detector, # torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'] # ) else: start_epoch = -1 optimistic_restore(detector.detector, ckpt['state_dict']) detector.roi_fmap[1][0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap[1][3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap[1][0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap[1][3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.roi_fmap_obj[0].weight.data.copy_( ckpt['state_dict']['roi_fmap.0.weight']) detector.roi_fmap_obj[3].weight.data.copy_( ckpt['state_dict']['roi_fmap.3.weight']) detector.roi_fmap_obj[0].bias.data.copy_( ckpt['state_dict']['roi_fmap.0.bias']) detector.roi_fmap_obj[3].bias.data.copy_( ckpt['state_dict']['roi_fmap.3.bias']) detector.cuda() print("Training starts now!") optimizer, scheduler = get_optim(detector, conf.lr * conf.num_gpus * conf.batch_size) best_eval = None for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs): rez = train_epoch(epoch, detector, train, train_loader, optimizer, conf, train_f) print("overall{:2d}: ({:.3f})\n{}".format(epoch, rez.mean(1)['total'], rez.mean(1)), flush=True) mAp = val_epoch(detector, val, val_loader, val_f) scheduler.step(mAp) if conf.save_dir is not None: if best_eval is None or mAp > best_eval: torch.save( { 'epoch': epoch, 'state_dict': detector.state_dict(), # 'optimizer': optimizer.state_dict(), }, os.path.join(conf.save_dir, 'best-val.tar')) best_eval = mAp
nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision) # Freeze the detector for n, param in detector.detector.named_parameters(): param.requires_grad = False logger.info(print_para(detector)) def get_optim(lr): # Lower the learning rate on the VGG fully connected layers by 1/10th. It's a hack, but it helps # stabilize the models. fc_params = [ p for n, p in detector.named_parameters() if n.startswith('roi_fmap') and p.requires_grad ] non_fc_params = [ p for n, p in detector.named_parameters() if not n.startswith('roi_fmap') and p.requires_grad ] params = [{ 'params': fc_params,
with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out # lang_stats = language_eval(json.load(open(conf.save_dir+'_print_predictions.json')), test.coco_ids[:2000], conf.save_dir+'_cache.json') ### load the caption generator ################################################## captionGenerator = GCNLSTMModel(train.ix_to_word, train.vocab_size, input_encoding_size=300, Dconv=4096, num_predicate=len(train.ind_to_predicates), rnn_type='lstm', rnn_size=512, num_layers=1, drop_prob_lm=0.5, seq_length=16, seq_per_img=5, att_feat_size=512, num_relation=conf.num_relation, freq_bl=conf.freq_bl) captionGenerator.cuda() print(print_para(captionGenerator), flush=True) if conf.caption_ckpt is not None: caption_ckpt = torch.load(conf.caption_ckpt) start_epoch = caption_ckpt['epoch'] if not optimistic_restore(captionGenerator, caption_ckpt['state_dict']): start_epoch = -1 else: start_epoch = -1 ###### now load the relation detector and set it to test mode!!! ################################### detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, pick_parent=conf.pick_parent, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals,
for n, param in detector.detector.named_parameters(): param.requires_grad = False """ if n.startswith('score'): param.requires_grad = False elif n.startswith('bbox'): param.requires_grad = False if n.startswith('roi'): param.requires_grad = False elif n.startswith('features'): param.requires_grad = False else: continue """ print(print_para(detector), flush=True) # optimizer def get_optim(lr): # Lower the learning rate on the VGG fully connected layers by 1/10th. It's a hack, but it helps stabilize the models. # p.requires_grad == True if it's not Faster RCNN param; == False if it's Faster RCNN param # original: add all 'roi_fmap' in Relmodel into fc_params; add all not 'roi_fmap' in Relmodel into non_fc_params; params in faster rcnn, continue fc_params = [ p for n, p in detector.named_parameters() if n.startswith('roi_fmap') and p.requires_grad ] non_fc_params = [ p for n, p in detector.named_parameters() if not n.startswith('roi_fmap') and p.requires_grad ]