def __init__(self, args, trained=True, bn=False, backbone_type='res101'): super(Vrd_Graph_GA_v4, self).__init__() self.n_obj = args.num_classes self.n_rel = args.num_relations global res101 if trained and backbone_type == 'res101': res101 = resnet.__dict__['resnet101'](pretrained=False, norm_layer=FrozenBatchNorm2d) weight_path = "../models/resnet101-5d3b4d8f.pth" state = torch.load(weight_path) res101.load_state_dict(state) layers_res = OrderedDict() for k, v in res101.named_children(): if k in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: layers_res[k] = v backbone = nn.Sequential(layers_res) # 2048 self.features = backbone else: res101 = resnet.__dict__['resnet101'](pretrained=False, norm_layer=FrozenBatchNorm2d) layers_res = OrderedDict() for k, v in res101.named_children(): if k in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: layers_res[k] = v backbone = nn.Sequential(layers_res) # 2048 self.features = backbone network.set_trainable(self.features, requires_grad=False) self.roi_pool = RoIPool((14, 14), 1.0 / 16) self.inter_layer = res101.layer4 network.set_trainable(self.inter_layer, requires_grad=False) self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) self.fc6 = nn.Linear(2048, 256) self.fc_obj = nn.Linear(2048, self.n_obj) self.gat_conv_rel1 = GatedGraphConv(out_channels=256, num_layers=2) self.rel_1 = nn.Linear(768, 256) self.rel_2 = nn.Linear(256, self.n_rel) self.fc_lov = nn.Linear(8, 256) self.fc_sub_obj = nn.Linear(2 * 300, 256) self.initialize_param()
use_region=options['data'].get('use_region', False)) print("Done") # Model declaration model = getattr(models, options['model']['arch'])(test_set, opts=options['model']) print("Done.") test_loader = torch.utils.data.DataLoader( test_set, batch_size=options['data']['batch_size'], shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=getattr(datasets, options['data']['dataset']).collate) network.set_trainable(model, False) print('Loading pretrained model: {}'.format(args.pretrained_model)) args.train_all = True network.load_net(args.pretrained_model, model) # Setting the state of the training model model.cuda() model.eval() print( '--------------------------------------------------------------------------' ) print( '3D-Scene-Graph-Generator Demo: Object detection and Scene Graph Generation' ) print( '--------------------------------------------------------------------------'
def main(): global args, is_best, best_recall, best_recall_pred, best_recall_phrase # To set the model name automatically # Set options options = { 'logs': { 'model_name': args.model_name, 'dir_logs': args.dir_logs, }, 'data': { 'dataset_option': args.dataset_option, 'batch_size': torch.cuda.device_count(), }, 'optim': { 'lr': args.learning_rate, 'epochs': args.epochs, 'lr_decay_epoch': args.step_size, 'optimizer': args.optimizer, 'clip_gradient': args.clip_gradient, }, 'model': { 'MPS_iter': args.MPS_iter, 'dropout': args.dropout, 'use_loss_weight': args.loss_weight, }, } if args.path_opt is not None: with open(args.path_opt, 'r') as handle: options_yaml = yaml.load(handle) options = utils.update_values(options, options_yaml) with open(options['data']['opts'], 'r') as f: data_opts = yaml.load(f) options['data']['dataset_version'] = data_opts.get( 'dataset_version', None) options['opts'] = data_opts print '## args' pprint(vars(args)) print '## options' pprint(options) lr = options['optim']['lr'] options = get_model_name(options) print 'Checkpoints are saved to: {}'.format(options['logs']['dir_logs']) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set..."), train_set = getattr(datasets, options['data']['dataset'])( data_opts, 'train', dataset_option=options['data'].get('dataset_option', None), use_region=options['data'].get('use_region', False), ) test_set = getattr(datasets, options['data']['dataset'])( data_opts, 'test', dataset_option=options['data'].get('dataset_option', None), use_region=options['data'].get('use_region', False)) print("Done") # Model declaration model = getattr(models, options['model']['arch'])(train_set, opts=options['model']) # pass enough message for anchor target generation train_set._feat_stride = model.rpn._feat_stride train_set._rpn_opts = model.rpn.opts print("Done.") train_loader = torch.utils.data.DataLoader( train_set, batch_size=options['data']['batch_size'], shuffle=True, num_workers=args.workers, pin_memory=True, collate_fn=getattr(datasets, options['data']['dataset']).collate, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_set, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=getattr(datasets, options['data']['dataset']).collate) ## For debug # params = list(net.parameters()) # for param in params: # print param.size() # print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, mps_features = group_features( model, has_RPN=True) network.set_trainable(model, False) exp_logger = None # 1. only optimize MPS if args.optimize_MPS: print('Optimize the MPS part ONLY.') assert args.pretrained_model, 'Please specify the [pretrained_model]' print('Loading pretrained model: {}'.format(args.pretrained_model)) network.load_net(args.pretrained_model, model) args.train_all = False optimizer = get_optimizer(lr, 3, options, vgg_features_var, rpn_features, hdn_features, mps_features) # 2. resume training elif args.resume is not None: print('Loading saved model: {}'.format( os.path.join(options['logs']['dir_logs'], args.resume))) args.train_all = True optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) args.start_epoch, best_recall[0], exp_logger = load_checkpoint( model, optimizer, os.path.join(options['logs']['dir_logs'], args.resume)) else: if os.path.isdir(options['logs']['dir_logs']): if click.confirm( 'Logs directory already exists in {}. Erase?'.format( options['logs']['dir_logs'], default=False)): os.system('rm -r ' + options['logs']['dir_logs']) else: return os.system('mkdir -p ' + options['logs']['dir_logs']) path_new_opt = os.path.join(options['logs']['dir_logs'], os.path.basename(args.path_opt)) path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml') with open(path_new_opt, 'w') as f: yaml.dump(options, f, default_flow_style=False) with open(path_args, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) # 3. If we have some initialization points if args.pretrained_model is not None: print('Loading pretrained model: {}'.format(args.pretrained_model)) args.train_all = True network.load_net(args.pretrained_model, model) optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) # 4. training with pretrained RPN elif args.rpn is not None: print('Loading pretrained RPN: {}'.format(args.rpn)) args.train_all = False network.load_net(args.rpn, model.rpn) optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) # if args.warm_iters < 0: # args.warm_iters = options['optim']['lr_decay_epoch'] // 2 # 5. train in an end-to-end manner: no RPN given else: print('\n*** End-to-end Training ***\n'.format(args.rpn)) args.train_all = True optimizer = get_optimizer(lr, 0, options, vgg_features_var, rpn_features, hdn_features, mps_features) if args.warm_iters < 0: args.warm_iters = options['optim']['lr_decay_epoch'] assert args.start_epoch == 0, 'Set [start_epoch] to 0, or something unexpected will happen.' scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay']) # Setting the state of the training model model = DataParallel(model) model.cuda() model.train() # Set loggers if exp_logger is None: exp_name = os.path.basename( options['logs']['dir_logs']) # add timestamp exp_logger = logger.Experiment(exp_name, options) exp_logger.add_meters('train', make_meters()) exp_logger.add_meters('test', make_meters()) exp_logger.info['model_params'] = utils.params_count(model) print('Model has {} parameters'.format( exp_logger.info['model_params'])) # logger_path = "log/logger/{}".format(args.model_name) # if os.path.exists(logger_path): # shutil.rmtree(logger_path) # configure(logger_path, flush_secs=5) # setting up the logger # network.weights_normal_init(net, dev=0.01) top_Ns = [50, 100] if args.evaluate: recall, result = model.module.engines.test( test_loader, model, top_Ns, nms=args.nms, triplet_nms=args.triplet_nms, use_gt_boxes=args.use_gt_boxes) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( 'Top-%d Recall' '\t[Pred]: %2.3f%%' '\t[Phr]: %2.3f%%' '\t[Rel]: %2.3f%%' % (top_N, float(recall[2][idx]) * 100, float(recall[1][idx]) * 100, float(recall[0][idx]) * 100)) print('============ Done ============') save_results(result, None, options['logs']['dir_logs'], is_testing=True) return if args.predict: predict_loader = torch.utils.data.DataLoader(COCO_loader.CocoLoader(), batch_size=1, shuffle=False, num_workers=args.workers) relationship_image_map = {} image_path = "/home/tusharkumar91/WS/MAttNet/data/images/mscoco/images/train2014/" min_score = 0.01 tot_time = 0 with torch.no_grad(): for idx, sample in enumerate(predict_loader): path = image_path + sample['item'][0] tic = time.time() result, subject_inds, object_inds = model.module.engines.predict( path, train_set, model, [100], nms=args.nms, triplet_nms=args.triplet_nms, use_gt_boxes=args.use_gt_boxes) #print('======= Prediction Result =======') rel_list = [] for relationship in result['relationships']: #print("Subject : {} | Rel : {} | Object : {} | Score : {} | bbox : {}".format(train_set._object_classes[result['objects']['class'][relationship[0]]], # train_set._predicate_classes[relationship[2]], # train_set._object_classes[result['objects']['class'][relationship[1]]], # relationship[3], result["objects"]['bbox'][relationship[0]])) subject = train_set._object_classes[ result['objects']['class'][relationship[0]]] predicate = train_set._predicate_classes[relationship[2]] object = train_set._object_classes[ result['objects']['class'][relationship[1]]] score = relationship[3] rel_list += [(subject, predicate, object, score.item())] tok = time.time() tot_time += tok - tic #print("Time taken for prediction : {} seconds".format(tok-tic)) #print("Total Relationships found : {}".format(len(result['relationships']))) relationship_image_map[str(sample['id'][0].item())] = rel_list #print(relationship_image_map) if idx % 25 == 0: print("Time Spent : {}".format(tot_time)) print("Processed {}/{} images".format( idx + 1, len(predict_loader))) with open("sub_rel_obj_coco.json", "w") as f: json.dump(relationship_image_map, f) return if args.evaluate_object: result = model.module.engines.test_object_detection( test_loader, model, nms=args.nms, use_gt_boxes=args.use_gt_boxes) print('============ Done ============') path_dets = save_detections(result, None, options['logs']['dir_logs'], is_testing=True) print('Evaluating...') python_eval(path_dets, osp.join(data_opts['dir'], 'object_xml')) return print '========== [Start Training] ==========\n' FLAG_infinite = False loop_counter = 0 _ = None # for useless assignment # infinite training scheme while True: if FLAG_infinite: # not the first loop if not args.infinite: print('Infinite Training is disabled. Done.') break loop_counter += 1 args.train_all = True optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) args.start_epoch, _, exp_logger = load_checkpoint( model, optimizer, os.path.join(options['logs']['dir_logs'], 'ckpt')) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay']) options['optim']['epochs'] = args.start_epoch + options['optim'][ 'lr_decay_epoch'] * 5 args.iter_size *= 2 print('========= [{}] loop ========='.format(loop_counter)) print('[epoch {}] to [epoch {}]'.format( args.start_epoch, options['optim']['epochs'])) print('[iter_size]: {}'.format(args.iter_size)) FLAG_infinite = True for epoch in range(args.start_epoch, options['optim']['epochs']): # Training scheduler.step() print('[Learning Rate]\t{}'.format( optimizer.param_groups[0]['lr'])) is_best = False model.module.engines.train( train_loader, model, optimizer, exp_logger, epoch, args.train_all, args.print_freq, clip_gradient=options['optim']['clip_gradient'], iter_size=args.iter_size) if (epoch + 1) % args.eval_epochs == 0: print('\n============ Epoch {} ============'.format(epoch)) recall, result = model.module.engines.test( test_loader, model, top_Ns, nms=args.nms, triplet_nms=args.triplet_nms) # save_results(result, epoch, options['logs']['dir_logs'], is_testing=False) is_best = (recall[0] > best_recall).all() best_recall = recall[0] if is_best else best_recall best_recall_phrase = recall[ 1] if is_best else best_recall_phrase best_recall_pred = recall[2] if is_best else best_recall_pred print('\n[Result]') for idx, top_N in enumerate(top_Ns): print( '\tTop-%d Recall' '\t[Pred]: %2.3f%% (best: %2.3f%%)' '\t[Phr]: %2.3f%% (best: %2.3f%%)' '\t[Rel]: %2.3f%% (best: %2.3f%%)' % (top_N, float(recall[2][idx]) * 100, float(best_recall_pred[idx]) * 100, float(recall[1][idx]) * 100, float(best_recall_phrase[idx]) * 100, float(recall[0][idx]) * 100, float(best_recall[idx]) * 100)) save_checkpoint( { 'epoch': epoch, 'arch': options['model']['arch'], 'exp_logger': exp_logger, 'best_recall': best_recall[0], }, model.module, #model.module.state_dict(), optimizer.state_dict(), options['logs']['dir_logs'], args.save_all_from, is_best) print('====================================') # updating learning policy if (epoch + 1) == args.warm_iters: print('Free the base CNN part.') # options['optim']['clip_gradient'] = False args.train_all = True # update optimizer and correponding requires_grad state optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay'])
def main(): global args print "Loading training set and testing set..." with open(args.path_data_opts, 'r') as f: data_opts = yaml.load(f, Loader=yaml.FullLoader) train_set = VRD(data_opts, 'train', batch_size=args.batch_size) test_set = VRD(data_opts, 'test', batch_size=args.batch_size) print "Done." with open(args.path_rpn_opts, 'r') as f: opts = yaml.load(f, Loader=yaml.FullLoader) opts['scale'] = train_set.opts['test']['SCALES'][0] print('scale: {}'.format(opts['scale'])) net = RPN(opts) # pass enough message for anchor target generation train_set._feat_stride = net._feat_stride train_set._rpn_opts = net.opts # in evluate mode, we disable the shuffle train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, shuffle=False if args.evaluate else True, num_workers=args.workers, pin_memory=True, collate_fn=VRD.collate) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=VRD.collate) if args.resume is not None: print('Resume training from: {}'.format(args.resume)) RPN_utils.load_checkpoint(args.resume, net) optimizer = torch.optim.SGD([ { 'params': list(net.parameters())[26:] }, ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print 'Training from scratch.' optimizer = torch.optim.SGD(list(net.parameters())[26:], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) network.set_trainable(net.features, requires_grad=False) net.cuda() if args.evaluate: # evaluate training set net.eval() test(test_loader, net) return if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) best_recall = 0. for epoch in range(0, args.max_epoch): # Training train(train_loader, net, optimizer, epoch) # Testing net.eval() recall, _ = test(test_loader, net) print( 'Epoch[{epoch:d}]: ' 'Recall: ' 'object: {recall: .3f}%% (Best: {best_recall: .3f}%%)'.format( epoch=epoch, recall=recall * 100, best_recall=best_recall * 100)) # update learning rate if epoch % args.step_size == 0 and epoch > 0: args.disable_clip_gradient = True args.lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join(args.output_dir, '{}'.format(args.model_name)) RPN_utils.save_checkpoint(save_name, net, epoch, np.all(recall > best_recall)) best_recall = recall if recall > best_recall else best_recall
def main(): global args, is_best, best_recall, best_recall_pred, best_recall_phrase # To set the model name automatically # Set options options = { 'logs': { 'model_name': args.model_name, 'dir_logs': args.dir_logs, }, 'data': { 'dataset_option': args.dataset_option, 'batch_size': torch.cuda.device_count(), # batch_size根据GPU数量调节 }, 'optim': { 'lr': args.learning_rate, 'epochs': args.epochs, 'lr_decay_epoch': args.step_size, 'optimizer': args.optimizer, 'clip_gradient': args.clip_gradient, }, 'model': { 'MPS_iter': args.MPS_iter, 'dropout': args.dropout, 'use_loss_weight': args.loss_weight, }, } if args.path_opt is not None: with open(args.path_opt, 'r') as handle: options_yaml = yaml.load(handle) # 此处更新了options的结构,yaml路径为options/models,选择方法根据命令行传入模型的参数 options = utils.update_values(options, options_yaml) with open(options['data']['opts'], 'r') as f: data_opts = yaml.load(f) options['data']['dataset_version'] = data_opts.get( 'dataset_version', None) options['opts'] = data_opts print '## args' pprint(vars(args)) print '## options' pprint(options) lr = options['optim']['lr'] options = get_model_name(options) print 'Checkpoints are saved to: {}'.format(options['logs']['dir_logs']) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) # 设置dataset,getattr获取dataset信息确定使用lib/datasets下三个py文件中的哪一个,后面的为初始化参数 print("Loading training set and testing set..."), train_set = getattr(datasets, options['data']['dataset'])( data_opts, 'train', dataset_option=options['data'].get('dataset_option', None), use_region=options['data'].get('use_region', False), ) test_set = getattr(datasets, options['data']['dataset'])( data_opts, 'test', dataset_option=options['data'].get('dataset_option', None), use_region=options['data'].get('use_region', False)) print("Done") # Model declaration # model从options['model']['arch']=FN_v4中读取 # from HDN_v2.factorizable_network_v4s import Factorizable_network as FN_v4s(见models/_init_.py) model = getattr(models, options['model']['arch'])(train_set, opts=options['model']) # pass enough message for anchor target generation # 只有train时才需要设置的参数,test时为none train_set._feat_stride = model.rpn._feat_stride train_set._rpn_opts = model.rpn.opts print("Done.") # 加载数据:函数见lib/datasets下三个py文件 # batch_size根据GPU数量调节,只在train的时候生效,test时batch size固定为1 train_loader = torch.utils.data.DataLoader( train_set, batch_size=options['data']['batch_size'], shuffle=True, num_workers=args.workers, pin_memory=True, collate_fn=getattr(datasets, options['data']['dataset']).collate, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_set, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=getattr(datasets, options['data']['dataset']).collate) ## For debug # params = list(net.parameters()) # for param in params: # print param.size() # print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, mps_features = group_features( model, has_RPN=True) network.set_trainable(model, False) exp_logger = None # 1. only optimize MPS if args.optimize_MPS: print('Optimize the MPS part ONLY.') assert args.pretrained_model, 'Please specify the [pretrained_model]' print('Loading pretrained model: {}'.format(args.pretrained_model)) network.load_net(args.pretrained_model, model) args.train_all = False optimizer = get_optimizer(lr, 3, options, vgg_features_var, rpn_features, hdn_features, mps_features) # 2. resume training elif args.resume is not None: print('Loading saved model: {}'.format( os.path.join(options['logs']['dir_logs'], args.resume))) args.train_all = True optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) args.start_epoch, best_recall[0], exp_logger = load_checkpoint( model, optimizer, os.path.join(options['logs']['dir_logs'], args.resume)) else: if os.path.isdir(options['logs']['dir_logs']): if click.confirm( 'Logs directory already exists in {}. Erase?'.format( options['logs']['dir_logs'], default=False)): os.system('rm -r ' + options['logs']['dir_logs']) else: return os.system('mkdir -p ' + options['logs']['dir_logs']) path_new_opt = os.path.join(options['logs']['dir_logs'], os.path.basename(args.path_opt)) path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml') with open(path_new_opt, 'w') as f: yaml.dump(options, f, default_flow_style=False) with open(path_args, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) # 3. If we have some initialization points if args.pretrained_model is not None: print('Loading pretrained model: {}'.format(args.pretrained_model)) args.train_all = True network.load_net(args.pretrained_model, model) optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) # 4. training with pretrained RPN elif args.rpn is not None: print('Loading pretrained RPN: {}'.format(args.rpn)) args.train_all = False network.load_net(args.rpn, model.rpn) optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) # if args.warm_iters < 0: # args.warm_iters = options['optim']['lr_decay_epoch'] // 2 # 5. train in an end-to-end manner: no RPN given else: print('\n*** End-to-end Training ***\n'.format(args.rpn)) args.train_all = True optimizer = get_optimizer(lr, 0, options, vgg_features_var, rpn_features, hdn_features, mps_features) if args.warm_iters < 0: args.warm_iters = options['optim']['lr_decay_epoch'] assert args.start_epoch == 0, 'Set [start_epoch] to 0, or something unexpected will happen.' scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay']) # Setting the state of the training model model = DataParallel(model) model.cuda() model.train() # Set loggers if exp_logger is None: exp_name = os.path.basename( options['logs']['dir_logs']) # add timestamp exp_logger = logger.Experiment(exp_name, options) exp_logger.add_meters('train', make_meters()) exp_logger.add_meters('test', make_meters()) exp_logger.info['model_params'] = utils.params_count(model) print('Model has {} parameters'.format( exp_logger.info['model_params'])) # logger_path = "log/logger/{}".format(args.model_name) # if os.path.exists(logger_path): # shutil.rmtree(logger_path) # configure(logger_path, flush_secs=5) # setting up the logger # network.weights_normal_init(net, dev=0.01) top_Ns = [50, 100] # 测试函数入口 # 测试函数test见models/HDN_v2/engines_v1.py if args.evaluate: recall, result = model.module.engines.test( test_loader, model, top_Ns, nms=args.nms, triplet_nms=args.triplet_nms, use_gt_boxes=args.use_gt_boxes) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( 'Top-%d Recall' '\t[Pred]: %2.3f%%' '\t[Phr]: %2.3f%%' '\t[Rel]: %2.3f%%' % (top_N, float(recall[2][idx]) * 100, float(recall[1][idx]) * 100, float(recall[0][idx]) * 100)) print('============ Done ============') save_results(result, None, options['logs']['dir_logs'], is_testing=True) return if args.evaluate_object: result = model.module.engines.test_object_detection( test_loader, model, nms=args.nms, use_gt_boxes=args.use_gt_boxes) print('============ Done ============') path_dets = save_detections(result, None, options['logs']['dir_logs'], is_testing=True) print('Evaluating...') python_eval(path_dets, osp.join(data_opts['dir'], 'object_xml')) return print '========== [Start Training] ==========\n' FLAG_infinite = False loop_counter = 0 _ = None # for useless assignment # infinite training scheme while True: if FLAG_infinite: # not the first loop if not args.infinite: print('Infinite Training is disabled. Done.') break loop_counter += 1 args.train_all = True optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) args.start_epoch, _, exp_logger = load_checkpoint( model, optimizer, os.path.join(options['logs']['dir_logs'], 'ckpt')) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay']) options['optim']['epochs'] = args.start_epoch + options['optim'][ 'lr_decay_epoch'] * 5 args.iter_size *= 2 print('========= [{}] loop ========='.format(loop_counter)) print('[epoch {}] to [epoch {}]'.format( args.start_epoch, options['optim']['epochs'])) print('[iter_size]: {}'.format(args.iter_size)) FLAG_infinite = True for epoch in range(args.start_epoch, options['optim']['epochs']): # Training scheduler.step() print('[Learning Rate]\t{}'.format( optimizer.param_groups[0]['lr'])) is_best = False model.module.engines.train( train_loader, model, optimizer, exp_logger, epoch, args.train_all, args.print_freq, clip_gradient=options['optim']['clip_gradient'], iter_size=args.iter_size) if (epoch + 1) % args.eval_epochs == 0: print('\n============ Epoch {} ============'.format(epoch)) recall, result = model.module.engines.test( test_loader, model, top_Ns, nms=args.nms, triplet_nms=args.triplet_nms) # save_results(result, epoch, options['logs']['dir_logs'], is_testing=False) is_best = (recall[0] > best_recall).all() best_recall = recall[0] if is_best else best_recall best_recall_phrase = recall[ 1] if is_best else best_recall_phrase best_recall_pred = recall[2] if is_best else best_recall_pred print('\n[Result]') for idx, top_N in enumerate(top_Ns): print( '\tTop-%d Recall' '\t[Pred]: %2.3f%% (best: %2.3f%%)' '\t[Phr]: %2.3f%% (best: %2.3f%%)' '\t[Rel]: %2.3f%% (best: %2.3f%%)' % (top_N, float(recall[2][idx]) * 100, float(best_recall_pred[idx]) * 100, float(recall[1][idx]) * 100, float(best_recall_phrase[idx]) * 100, float(recall[0][idx]) * 100, float(best_recall[idx]) * 100)) save_checkpoint( { 'epoch': epoch, 'arch': options['model']['arch'], 'exp_logger': exp_logger, 'best_recall': best_recall[0], }, model.module, #model.module.state_dict(), optimizer.state_dict(), options['logs']['dir_logs'], args.save_all_from, is_best) print('====================================') # updating learning policy if (epoch + 1) == args.warm_iters: print('Free the base CNN part.') # options['optim']['clip_gradient'] = False args.train_all = True # update optimizer and correponding requires_grad state optimizer = get_optimizer(lr, 2, options, vgg_features_var, rpn_features, hdn_features, mps_features) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=options['optim']['lr_decay_epoch'], gamma=options['optim']['lr_decay'])
def main(): global args print "Loading training set and testing set..." with open(args.path_data_opts, 'r') as f: data_opts = yaml.load(f) args.model_name += '_' + data_opts[ 'dataset_version'] + '_' + args.dataset_option train_set = visual_genome(data_opts, 'train', dataset_option=args.dataset_option, batch_size=args.batch_size, use_region=True) test_set = visual_genome(data_opts, 'test', dataset_option=args.dataset_option, batch_size=args.batch_size, use_region=True) print "Done." with open(args.path_rpn_opts, 'r') as f: opts = yaml.load(f) opts['scale'] = train_set.opts['test']['SCALES'][0] net = RPN(opts) # pass enough message for anchor target generation train_set._feat_stride = net._feat_stride train_set._rpn_opts = net.opts train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, shuffle=False if args.evaluate else True, num_workers=args.workers, pin_memory=True, collate_fn=visual_genome.collate) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=visual_genome.collate) if args.resume is not None: print('Resume training from: {}'.format(args.resume)) RPN_utils.load_checkpoint(args.resume, net) optimizer = torch.optim.SGD([ { 'params': list(net.parameters())[26:] }, ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print('Training from scratch.') optimizer = torch.optim.SGD(list(net.parameters())[26:], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) network.set_trainable(net.features, requires_grad=False) net.cuda() if args.evaluate: # evaluate training set data_dir = os.path.join(data_opts['dir'], 'vg_cleansing', 'output', data_opts['dataset_version']) filename = args.dump_name + '_' + args.dataset_option net.eval() evaluate(test_loader, net, path=os.path.join(data_dir, filename), dataset='test') return if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) best_recall = np.array([0.0, 0.0]) for epoch in range(0, args.max_epoch): # Training train(train_loader, net, optimizer, epoch) # Testing net.eval() recall, _, _ = test(test_loader, net) print( 'Epoch[{epoch:d}]: ' 'Recall: ' 'object: {recall[0]: .3f}%% (Best: {best_recall[0]: .3f}%%)' 'region: {recall[1]: .3f}%% (Best: {best_recall[1]: .3f}%%)'. format(epoch=epoch, recall=recall * 100, best_recall=best_recall * 100)) # update learning rate if epoch % args.step_size == 0 and epoch > 0: args.disable_clip_gradient = True args.lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join(args.output_dir, '{}'.format(args.model_name)) RPN_utils.save_checkpoint(save_name, net, epoch, np.all(recall > best_recall)) best_recall = recall if np.all(recall > best_recall) else best_recall
def __init__(self, args, bn=False): super(Vrd_Model, self).__init__() self.n_rel = args.num_relations self.n_obj = args.num_classes self.conv1 = nn.Sequential(Conv2d(3, 64, 3, same_padding=True, bn=bn), Conv2d(64, 64, 3, same_padding=True, bn=bn), nn.MaxPool2d(2)) self.conv2 = nn.Sequential( Conv2d(64, 128, 3, same_padding=True, bn=bn), Conv2d(128, 128, 3, same_padding=True, bn=bn), nn.MaxPool2d(2)) network.set_trainable(self.conv1, requires_grad=False) network.set_trainable(self.conv2, requires_grad=False) self.conv3 = nn.Sequential( Conv2d(128, 256, 3, same_padding=True, bn=bn), Conv2d(256, 256, 3, same_padding=True, bn=bn), Conv2d(256, 256, 3, same_padding=True, bn=bn), nn.MaxPool2d(2)) self.conv4 = nn.Sequential( Conv2d(256, 512, 3, same_padding=True, bn=bn), Conv2d(512, 512, 3, same_padding=True, bn=bn), Conv2d(512, 512, 3, same_padding=True, bn=bn), nn.MaxPool2d(2)) self.conv5 = nn.Sequential( Conv2d(512, 512, 3, same_padding=True, bn=bn), Conv2d(512, 512, 3, same_padding=True, bn=bn), Conv2d(512, 512, 3, same_padding=True, bn=bn)) network.set_trainable(self.conv3, requires_grad=False) network.set_trainable(self.conv4, requires_grad=False) network.set_trainable(self.conv5, requires_grad=False) self.roi_pool = RoIPool(7, 7, 1.0 / 16) self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.fc_obj = FC(4096, self.n_obj, relu=False) network.set_trainable(self.fc6, requires_grad=False) network.set_trainable(self.fc7, requires_grad=False) network.set_trainable(self.fc_obj, requires_grad=False) self.fc8 = FC(4096, 256) n_fusion = 256 if (args.use_so): self.fc_so = FC(256 * 2, 256) n_fusion += 256 if (args.loc_type == 1): self.fc_lov = FC(8, 256) n_fusion += 256 elif (args.loc_type == 2): self.conv_lo = nn.Sequential( Conv2d(2, 96, 5, same_padding=True, stride=2, bn=bn), Conv2d(96, 128, 5, same_padding=True, stride=2, bn=bn), Conv2d(128, 64, 8, same_padding=False, bn=bn)) self.fc_lov = FC(64, 256) n_fusion += 256 if (args.use_obj): self.emb = nn.Embedding(self.n_obj, 300) network.set_trainable(self.emb, requires_grad=False) self.fc_so_emb = FC(300 * 2, 256) n_fusion += 256 self.fc_fusion = FC(n_fusion, 256) self.fc_rel = FC(256, self.n_rel, relu=False)