def eval_model(img_net, seq_net, device, data): model = Model(img_net, seq_net).to(device) model_name = f"SEQ_{img_net}_IMG_{seq_net}" model_path = f"./trained_models_new/{model_name}_checkpoint.pt" print("LOADING MODEL") model.load_state_dict(torch.load(model_path, map_location=device)) print("MODEL LOADED - EVAL STARTING") return eval(model, data, device)
def test(args): # Some preparation torch.manual_seed(1000) if torch.cuda.is_available(): torch.cuda.manual_seed(1000) else: raise SystemExit('No CUDA available, don\'t do this.') print('Loading data') loader = Data_loader(args.bsize, args.emb, args.multilabel, train=False) print( 'Parameters:\n\tvocab size: %d\n\tembedding dim: %d\n\tK: %d\n\tfeature dim: %d\ \n\thidden dim: %d\n\toutput dim: %d' % (loader.q_words, args.emb, loader.K, loader.feat_dim, args.hid, loader.n_answers)) model = Model(vocab_size=loader.q_words, emb_dim=args.emb, K=loader.K, feat_dim=loader.feat_dim, hid_dim=args.hid, out_dim=loader.n_answers, pretrained_wemb=loader.pretrained_wemb) model = model.cuda() if args.modelpath and os.path.isfile(args.modelpath): print('Resuming from checkpoint %s' % (args.modelpath)) ckpt = torch.load(args.modelpath) model.load_state_dict(ckpt['state_dict']) else: raise SystemExit('Need to provide model path.') result = [] for step in xrange(loader.n_batches): # Batch preparation q_batch, a_batch, i_batch = loader.next_batch() q_batch = Variable(torch.from_numpy(q_batch)) i_batch = Variable(torch.from_numpy(i_batch)) q_batch, i_batch = q_batch.cuda(), i_batch.cuda() # Do one model forward and optimize output = model(q_batch, i_batch) _, ix = output.data.max(1) for i, qid in enumerate(a_batch): result.append({'question_id': qid, 'answer': loader.a_itow[ix[i]]}) json.dump(result, open('result.json', 'w')) print('Validation done')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--seq_model", type=str, help="name of time series model", required=True, choices=["vanilla_rnn", "lstm", "lstmn", "transformer_abs", "stack_lstm"]) parser.add_argument("--img_model", type=str, help="name of img processing model name", required=True, choices=['early_fusion', 'late_fusion', 'slow_fusion', 'resnet', 'densenet', 'vgg', 'vanilla_cnn']) parser.add_argument("--gpu", type=int, help="which gpu to run on", required=True, choices=[0, 1]) args = parser.parse_args() chunked_needed = args.img_model in frozenset(["slow_fusion", "early_fusion", "late_fusion"]) device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") # import sys # print(f"Args for SEQ:{args.seq_model}, IMG:{args.img_model} recieved correctly") # sys.exit(0) data = None print("Getting Data...") if chunked_needed: data = pickle.load(open("./converter/trailers_chunked.p", "rb")) # data = torch.randn(5, 10, 3, 10, 64, 64) else: data = pickle.load(open("./converter/trailers_normal.p", "rb")) # data = torch.randn(5, 100, 3, 64, 64) labels = pickle.load(open("./converter/scores.p", "rb")) # labels = torch.zeros(data.shape[0]) # print("Data Loaded") # print(labels.shape) # print(data.shape) X_train, X_test, Y_train, Y_test = train_test_split(data, labels, test_size=0.15) # print("Train Test Split Generated") X_train, X_test, Y_train, Y_test = list(map(lambda x : torch.from_numpy(x).float(), (X_train, X_test, Y_train, Y_test))) # print("Tensors Generated") train_iter = list(zip(X_train, Y_train)) eval_iter = list(zip(X_test, Y_test)) # print("Training/Eval Iterators Generated") # print("Creating Model....") model_name = f"SEQ_{args.seq_model}_IMG_{args.img_model}" model = Model(args.img_model, args.seq_model).to(device) print("Training Model....") train(500, model, train_iter, eval_iter, model_name, device, tolerance=15) print(f"{model_name} training complete")
def main(): # Define model parameters and options in dictionary of flags FLAGS = getFlags() # Initialize model model = Model(FLAGS) # Specify number of training steps training_steps = FLAGS.__dict__['training_steps'] # Define feed dictionary and loss name for EarlyStoppingHook loss_name = "loss_stopping:0" start_step = FLAGS.__dict__['early_stopping_start'] stopping_step = FLAGS.__dict__['early_stopping_step'] tolerance = FLAGS.__dict__['early_stopping_tol'] # Define saver which only keeps previous 3 checkpoints (default=10) scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=3)) # Enable GPU if FLAGS.__dict__['use_gpu']: config = tf.ConfigProto() config = tf.ConfigProto(device_count={'GPU': 1}) config.gpu_options.allow_growth = True else: config = tf.ConfigProto(device_count={'GPU': 0}) if FLAGS.early_stopping: hooks = [ tf.train.StopAtStepHook(last_step=training_steps), EarlyStoppingHook(loss_name, tolerance=tolerance, stopping_step=stopping_step, start_step=start_step) ] else: hooks = [tf.train.StopAtStepHook(last_step=training_steps)] # Initialize TensorFlow monitored training session with tf.train.MonitoredTrainingSession( config=config, checkpoint_dir=os.path.join(FLAGS.__dict__['model_dir'], FLAGS.__dict__['checkpoint_dir']), hooks=hooks, save_summaries_steps=None, save_summaries_secs=None, save_checkpoint_secs=None, save_checkpoint_steps=FLAGS.__dict__['checkpoint_step'], scaffold=scaffold) as sess: # Set model session model.set_session(sess) # Train model model.train() print("\n[ TRAINING COMPLETE ]\n") # Create new session for model evaluation with tf.Session() as sess: # Restore network parameters from latest checkpoint saver = tf.train.Saver() saver.restore( sess, tf.train.latest_checkpoint( os.path.join(FLAGS.__dict__['model_dir'], FLAGS.__dict__['checkpoint_dir']))) # Set model session using restored sess model.set_session(sess) # Initialize datasets model.initialize_datasets() # Reinitialize dataset handles model.reinitialize_handles() # Evaluate model print("[ Evaluating Model ]") #t_loss, v_loss, t_uq, v_uq = model.evaluate() t_loss, v_loss, t_uq, v_uq, t_l1, v_l1, t_l2, v_l2 = model.evaluate() print("\n\n[ Final Evaluations ]") print("Training loss: %.7f [ UQ = %.7f ]" % (t_loss, t_uq)) print("Validation loss: %.7f [ UQ = %.7f ]\n" % (v_loss, v_uq)) print(" ") print("Training relative loss: %.7f [L1] %.7f [L2]" % (t_l1, t_l2)) print("Validation relative loss: %.7f [L1] %.7f [L2]\n" % (v_l1, v_l2)) with open( os.path.join(FLAGS.__dict__['model_dir'], "final_losses.csv"), "w") as csvfile: csvwriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow([t_loss, v_loss]) csvwriter.writerow([t_uq, v_uq]) csvwriter.writerow([t_l1, v_l1]) csvwriter.writerow([t_l2, v_l2])
best_model_path = os.path.join("models", model_type, "best_model.pt") optim_path = os.path.join("models", model_type, "optim.pt") stats_path = os.path.join("stats", model_type, "stats.pkl") def init_weights(m): try: torch.nn.init.kaiming_uniform_(m.weight.data) m.bias.data.zero_() except: pass device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if "residual" in experiment: model = Model(num_features=num_features, num_residual=num_residual).to(device) else: model = Model(num_features=num_features).to(device) criterion = nn.CrossEntropyLoss(ignore_index=8) model.apply(init_weights) print(type(model)) if os.path.exists(latest_model_path): print("Model exists. Loading from {0}".format(latest_model_path)) model = torch.load(latest_model_path) optimizer = optim.Adam(model.parameters(), lr=lr) if os.path.exists(optim_path):
parser.add_argument("--do_test", default=True, type=util.str2bool) parser.add_argument("--cache_dataset", default=False, type=util.str2bool) parser.add_argument("--cache_path", default="", type=str) ############################################################################ parser.add_argument("--default_save_path", default="./", type=str) parser.add_argument("--gradient_clip_val", default=0, type=float) parser.add_argument("--num_nodes", default=1, type=int) parser.add_argument("--gpus", default=None, type=int) parser.add_argument("--overfit_batches", default=0.0, type=float) parser.add_argument("--track_grad_norm", default=-1, type=int) parser.add_argument("--check_val_every_n_epoch", default=1, type=int) parser.add_argument("--fast_dev_run", default=False, type=util.str2bool) parser.add_argument("--accumulate_grad_batches", default=1, type=int) parser.add_argument("--max_epochs", default=1000, type=int) parser.add_argument("--min_epochs", default=1, type=int) parser.add_argument("--max_steps", default=None, type=int) parser.add_argument("--min_steps", default=None, type=int) parser.add_argument("--val_check_interval", default=1.0, type=float) parser.add_argument("--log_every_n_steps", default=10, type=int) parser.add_argument("--distributed_backend", default=None, type=str) parser.add_argument("--precision", default=32, type=int) parser.add_argument("--resume_from_checkpoint", default=None, type=str) ############################################################################ parser = Model.add_model_specific_args(parser) parser = Tagger.add_model_specific_args(parser) parser = Classifier.add_model_specific_args(parser) parser = DependencyParser.add_model_specific_args(parser) parser = Aligner.add_model_specific_args(parser) hparams = parser.parse_args() main(hparams)
if __name__ == '__main__': opt = opts.parse_opt() seed = 0 if opt.seed == 0: seed = random.randint(1, 10000) random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(opt.seed) else: seed = opt.seed torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file(opt.dataroot + 'dictionary.pkl') opt.ntokens = dictionary.ntoken model = Model(opt) model = model.cuda() model.apply(weights_init_kn) model = nn.DataParallel(model).cuda() train_dset = VQAFeatureDataset('train', dictionary, opt.dataroot, opt.img_root, adaptive=False) # load labeld data eval_dset = VQAFeatureDataset('test', dictionary, opt.dataroot, opt.img_root, adaptive=False) train_loader = DataLoader(train_dset, opt.batch_size, shuffle=True, num_workers=4, collate_fn=utils.trim_collate) opt.use_all = 1 eval_loader = DataLoader(eval_dset, opt.batch_size, shuffle=False, num_workers=4, collate_fn=utils.trim_collate) train(model, train_loader, eval_loader, opt)
torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file(opt.dataroot + 'dictionary.pkl') opt.ntokens = dictionary.ntoken eval_dset = VQAFeatureDataset('test', dictionary, opt.dataroot, opt.img_root, 1.0, adaptive=False) n_device = torch.cuda.device_count() batch_size = opt.batch_size * n_device model = Model(opt) model = model.cuda() eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=1, collate_fn=utils.trim_collate) def process(args, model, eval_loader): print('loading %s' % opt.checkpoint_path) model_data = torch.load(opt.checkpoint_path) model = nn.DataParallel(model).cuda() model.load_state_dict(model_data.get('model_state', model_data))