def main(): import argparse parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument("--weights_file", help="the model to start from") args = parser.parse_args() batch_size = 640 #lr = 1e-5 lr = 0.00001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25) dataset_folder = 'imSitu' imgset_folder = 'resized_256' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_verb_small.RelationNetworks(encoder, args.gpuid) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder +"/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder +"/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) if args.command == "resume": print ("loading model weights...") model.load_state_dict(torch.load(args.weights_file)) if args.gpuid >= 0: #print('GPU enabled') model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))''' #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, 'trained_models', encoder, args.gpuid, clip_norm, lr_max)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) args = parser.parse_args() dataset_folder = 'imSitu' imgset_folder = 'resized_256' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = cnn_linear_marginal.baseline(encoder, args.gpuid) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=4) if args.gpuid >= 0: #print('GPU enabled') model.cuda() lr_set = [0.001] decay_set = [5e-4] for lr in lr_set: for decay in decay_set: #lr, weight decay user param print('CURRENT PARAM SET : lr, decay :', lr, decay) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, optimizer, scheduler, 200, 'trained_models', encoder, args.gpuid)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) args = parser.parse_args() dataset_folder = 'imsitu_data' imgset_folder = 'of500_images_resized' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = baseline_model.baseline(encoder, args.gpuid) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=3) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=3) if args.gpuid >= 0: #print('GPU enabled') model.cuda() #lr, weight decay user param optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, optimizer, 200, 'trained_models', encoder, args.gpuid)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imSitu' imgset_folder = 'resized_256' print( 'model spec :, mac net v pred for training and loss calc normalizing from only matching role count ' ) train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = mac_model_with_verb.E2ENetwork(encoder, args.gpuid) # To group up the features cnn_features, verb_features, role_features = utils.group_features_single( model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.dev_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format( args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'. format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print( 'CNN finetune, Verb finetune, train role from the scratch from: {}' .format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer_single(lr, weight_decay, optimizer_select, cnn_features, verb_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if args.gpuid >= 0: #print('GPU enabled') model.cuda() '''optimizer = torch.optim.Adam([{'params': model.conv.parameters(), 'lr': 5e-5}, {'params': model.verb.parameters(), 'lr': 5e-5}, {'params': model.role_labeller.parameters()}], lr=1e-3)''' #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
def main(): import argparse parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument('--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument('--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imSitu' imgset_folder = 'resized_256' print('model spec :, mac net v + rn ') train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_mac_with_rn.E2ENetwork(encoder, args.gpuid) # To group up the features cnn_features, verb_features, role_features = utils.group_features_single(model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder +"/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder +"/test.json")) test_set = imsitu_loader(imgset_folder, test_set, encoder, model.dev_preprocess()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder +"/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.dev_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format(args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'.format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print('CNN finetune, Verb finetune, train role from the scratch from: {}'.format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer_single(lr,weight_decay,optimizer_select, cnn_features, verb_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if args.gpuid >= 0: #print('GPU enabled') model.cuda() optimizer = torch.optim.Adam([{'params': model.conv.parameters(), 'lr': 5e-5}, {'params': model.verb.parameters()}, {'params': model.role_labeller.parameters()}], lr=1e-3) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file = True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] avg_score /= 8 print ('Dev average :{:.2f} {} {}'.format( avg_score*100, utils.format_dict(top1_avg,'{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file role_dict = top1.role_dict fail_val_all = top1.value_all_dict with open('role_pred_data.json', 'w') as fp: json.dump(role_dict, fp, indent=4) with open('fail_val_all.json', 'w') as fp: json.dump(fail_val_all, fp, indent=4) print('Writing predictions to file completed !') elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file = True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] avg_score /= 8 print ('Test average :{:.2f} {} {}'.format( avg_score*100, utils.format_dict(top1_avg,'{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument( "--command", choices=["train", "eval", "resume", 'predict', 'finetune'], required=True) parser.add_argument("--batch_size", '-b', type=int, default=64) parser.add_argument("--weights_file", help="the model to start from") parser.add_argument( '--finetune_verb', action='store_true', help='verb classifier train from the scratch, all others fixed') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') args = parser.parse_args() batch_size = args.batch_size #lr = 1e-5 lr = 1e-4 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-5 n_epoch = 500 n_worker = 4 # print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25) dataset_folder = 'imSitu' imgset_folder = 'resized_256' model_dir = 'trained_models' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_verb_embd.RelationNetworks(encoder, args.gpuid) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if args.command == "resume": print("loading model weights...") model.load_state_dict(torch.load(args.weights_file)) elif args.finetune_verb: print( 'CNN fix, Verb fc fixed, train verb classifier layer from the scratch from: {}' .format(args.verb_module)) if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) #print(model) if args.gpuid >= 0: print('GPU enabled') model.cuda() # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) utils.set_trainable(model, False) utils.set_trainable_param(model.classifier.parameters(), True) utils.set_trainable_param(model.verb.parameters(), True) optimizer = torch.optim.Adam([{ 'params': model.classifier.parameters(), 'lr': 1e-3 }, { 'params': model.verb.parameters(), 'lr': 5e-5 }]) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))''' #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, model_dir, encoder, args.gpuid, clip_norm, lr_max)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument("--command", choices=["train", "eval", "resume", 'predict'], required=True) parser.add_argument("--weights_file", help="the model to start from") args = parser.parse_args() batch_size = 640 lr = 5e-6 lr_max = 5e-4 lr_gamma = 2 lr_step = 10 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imsitu_data' imgset_folder = 'of500_images_resized' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_updated_big.RelationNetworks(encoder, args.gpuid) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=4, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/train_eval.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) if args.command == "resume": print("loading model weights...") model.load_state_dict(torch.load(args.weights_file)) if args.gpuid >= 0: #print('GPU enabled') model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, 'trained_models', encoder, args.gpuid, clip_norm, lr_max)
def main(): import argparse parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--agent_module', type=str, default='', help='pretrained agent module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument('--train_verb', action='store_true', help='cnn fix, agent fix, verb train from the scratch') parser.add_argument('--finetune_agent', action='store_true', help='cnn fix, agent finetune, verb train from the scratch') parser.add_argument('--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir print('model spec :, verb role with context ') train_set = json.load(open(dataset_folder + "/updated_train_new.json")) encoder = imsitu_encoder(train_set) model = model_agent2verb.BaseModel(encoder, args.gpuid) # To group up the features #all verb and role feat are under role as it's a single unit cnn_agent_features, cnn_verb_features, agent_features, verb_features = utils.group_features_agent2verb(model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder +"/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder +"/test.json")) test_set = imsitu_loader(imgset_folder, test_set, encoder, model.dev_preprocess()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder +"/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.dev_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_verb: print('CNN fix, agent fix, train verb from the scratch from: {}'.format(args.agent_module)) args.train_all = False if len(args.agent_module) == 0: raise Exception('[pretrained agent module] not specified') utils.load_net(args.agent_module, [model.conv_agent, model.agent], ['conv', 'agent']) optimizer_select = 1 model_name = 'cfx_afx_vtrain' elif args.finetune_agent: print('CNN fix, agent finetune, train verb from the scratch from: {}'.format(args.agent_module)) args.train_all = True if len(args.agent_module) == 0: raise Exception('[pretrained agent module] not specified') utils.load_net(args.agent_module, [model.conv, model.agent], ['conv', 'agent']) optimizer_select = 2 model_name = 'cfx_aft_vtrain' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' '''optimizer = utils.get_optimizer_noun(lr,weight_decay,optimizer_select, cnn_features, role_features)''' optimizer = utils.get_optimizer_agent2verb(lr,weight_decay,optimizer_select, cnn_agent_features, cnn_verb_features, agent_features, verb_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(1234) if args.gpuid >= 0: #print('GPU enabled') model.cuda() torch.cuda.manual_seed(1234) torch.backends.cudnn.deterministic = True optimizer = torch.optim.Adamax([{'params': cnn_verb_features, 'lr': 5e-5}, {'params': verb_features}], lr=1e-3) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file = True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print ('Dev average :{:.2f} {} {}'.format( avg_score*100, utils.format_dict(top1_avg,'{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file role_dict = top1.role_dict fail_agent = top1.fail_agent #print('roles :', role_dict) #fail_val_all = top1.value_all_dict #pass_val_dict = top1.vall_all_correct with open('role_pred_data.json', 'w') as fp: json.dump(role_dict, fp, indent=4) with open('fail_agent.json', 'w') as fp: json.dump(fail_agent, fp, indent=4) '''with open('fail_val_all.json', 'w') as fp: json.dump(fail_val_all, fp, indent=4) with open('pass_val_all.json', 'w') as fp: json.dump(pass_val_dict, fp, indent=4)''' print('Writing predictions to file completed !') elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file = True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print ('Test average :{:.2f} {} {}'.format( avg_score*100, utils.format_dict(top1_avg,'{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument("--command", choices=["train", "eval", "resume", 'predict'], required=True) parser.add_argument("--batch_size", '-b', type=int, default=16) parser.add_argument("--weights_file", help="the model to start from") parser.add_argument("--verb_group_file", help="csv containing most probable words for triplets") parser.add_argument( '--margin', type=float, default=0.2, help='the margin value for the triplet loss function (default: 0.2') args = parser.parse_args() batch_size = args.batch_size #lr = 1e-5 lr = 1e-4 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-5 n_epoch = 500 n_worker = 4 # print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25) dataset_folder = 'imSitu' imgset_folder = 'resized_256' model_dir = 'trained_models' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_verb_tuan.RelationNetworks(encoder, args.gpuid) #triplet_loss = model_verb_tuan.TripletMarginLoss(args.margin) train_set = imsitu_triplet_loader(imgset_folder, train_set, encoder, args.verb_group_file, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if args.command == "resume": print("loading model weights...") model.load_state_dict(torch.load(args.weights_file)) #print(model) if args.gpuid >= 0: #print('GPU enabled') model.cuda() # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) optimizer = torch.optim.Adam([{ 'params': model.conv.parameters(), 'lr': 5e-5 }, { 'params': model.verb.parameters() }], lr=1e-3) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))''' #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, model_dir, encoder, args.gpuid, clip_norm, lr_max)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imSitu' imgset_folder = 'resized_256' print( 'model spec :, 256 hidden, 1e-4 init lr, 25 epoch decay, 4 layer mlp for g,2mlp f1, 3 att layers with res connections param init xavier uni 2 heads dropout 0.5 mask 6loss maskb4g transformopt' ) train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_vsrl_finetune_selfatt_ff.RelationNetworks( encoder, args.gpuid) # To group up the features cnn_features, verb_features, role_features = utils.group_features(model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=24, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=24, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format( args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'. format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print( 'CNN finetune, Verb finetune, train role from the scratch from: {}' .format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select, cnn_features, verb_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if args.gpuid >= 0: #print('GPU enabled') model.cuda() opt = utils.NoamOpt(256, 1, 4000, optimizer) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, opt, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)