def load_pretrained_model(opt): """ Generating answers for (image, question) pair in the dataset. """ data_info = torch.load(opt['data_info']) word2idx = data_info["word2idx"] idx2word = data_info["idx2word"] idx2ans = data_info["idx2ans"] print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt['gpus']) word_embedded.load_pretrained_vectors(opt['word_vecs']) model = DCN(opt,2187) dict_checkpoint = opt['train_from'] if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) #model = torch.load(dict_checkpoint) checkpoint = torch.load(dict_checkpoint) print(checkpoint.keys()) model.load_state_dict(checkpoint['model']) if len(opt['gpus']) >= 1: model.cuda(opt['gpus'][0]) model.word_embedded = word_embedded model.eval() return model, idx2ans, word2idx
def main(opt): """ Generating answers for (image, question) pair in the dataset. """ print("Constructing the dataset...") testset = Dataset(opt.data_path, opt.data_name, "test", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "test", opt.seq_per_img) testLoader = DataLoader(testset, batch_size=opt.batch, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False, use_thread=opt.use_thread) idx2word = testset.idx2word idx2ans = testset.idx2ans ans_pool = testset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) if opt.predict_type in ["sum_attn", "cat_attn", "prod_attn"]: num_ans = ans_pool.size(0) model = DCN(opt, num_ans) if not opt.use_rcnn else DCNWithRCNN( opt, num_ans) else: ans = word_embedded(Variable(ans_pool.cuda(opt.gpus[0]), volatile=True)).data ans_mask = ans_pool.ne(0).float() model = DCNWithAns(opt, ans, ans_mask) if not opt.use_rcnn else \ DCNWithRCNNAns(opt, ans, ans_mask) dict_checkpoint = opt.train_from if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt.gpus) >= 1: model.cuda(opt.gpus[0]) model.word_embedded = word_embedded print("Generating answers...") with torch.cuda.device(opt.gpus[0]): answer(testLoader, model, idx2ans, opt, ensemble=opt.ensemble)
use_thread=opt['use_thread']) idx2word = testset.idx2word idx2ans = testset.idx2ans ans_pool = testset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt['gpus']) word_embedded.load_pretrained_vectors(opt['word_vectors']) num_ans = ans_pool.size(0) model = DCN(opt, num_ans) dict_checkpoint = opt['train_from'] if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt['gpus']) >= 1: model.cuda(opt['gpus'][0]) model.word_embedded = word_embedded print("Generating answers...") with torch.cuda.device(opt['gpus'][0]): answer(testLoader, model, idx2ans, opt)
def main(opt): Initializer.manual_seed(opt.seed) print("Constructing the dataset...") if opt.trainval == 0: trainset = Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img) trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) valset = Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img) valLoader = DataLoader(valset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) else: trainset = Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img, opt.img_name, opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \ RCNN_Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img) trainLoader = DataLoader(trainset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory, drop_last=opt.drop_last, use_thread=opt.use_thread) valset = None valLoader = None idx2word = trainset.idx2word ans_pool = trainset.ans_pool ans_pool = torch.from_numpy(ans_pool) print("Building model...") word_embedded = LargeEmbedding(len(idx2word), 300, padding_idx=0, devices=opt.gpus) word_embedded.load_pretrained_vectors(opt.word_vectors) if opt.predict_type in ["sum_attn", "cat_attn", "prod_attn"]: num_ans = ans_pool.size(0) model = DCN(opt, num_ans) if not opt.use_rcnn else DCNWithRCNN( opt, num_ans) else: ans = word_embedded(Variable(ans_pool.cuda(opt.gpus[0]), volatile=True)).data ans_mask = ans_pool.ne(0).float() model = DCNWithAns(opt, ans, ans_mask) if not opt.use_rcnn else \ DCNWithRCNNAns(opt, ans, ans_mask) criterion = BinaryLoss() evaluation = Accuracy() dict_checkpoint = opt.train_from if dict_checkpoint: print("Loading model from checkpoint at %s" % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) model.load_state_dict(checkpoint["model"]) if len(opt.gpus) >= 1: model.cuda(opt.gpus[0]) if len(opt.gpus) > 1: model = nn.DataParallel(model, opt.gpus, dim=0) model.word_embedded = word_embedded optimizer = Adam(list(filter(lambda x: x.requires_grad, model.parameters())), lr=opt.lr, weight_decay=opt.weight_decay, record_step=opt.record_step) scheduler = lr_scheduler.StepLR(optimizer, opt.step_size, gamma=opt.gamma) optim_wrapper = OptimWrapper(optimizer, scheduler) nparams = [] named_parameters = model.module.named_parameters() if len( opt.gpus) > 1 else model.named_parameters() for name, param in named_parameters: if not (name.startswith("resnet") or name.startswith("word_embedded") or name.startswith("ans")): nparams.append(param.numel()) print("* Number of parameters: %d" % sum(nparams)) checkpoint = None timer = Timer() timer.tic() try: with torch.cuda.device(opt.gpus[0]): trainModel(trainLoader, valLoader, model, criterion, evaluation, optim_wrapper, opt) except KeyboardInterrupt: print("It toke %.2f hours to train the network" % (timer.toc() / 3600)) sys.exit("Training interrupted") print("It toke %.2f hours to train the network" % (timer.toc() / 3600))