def DataloaderClevr(mode, opt={}, dl=False, opt_prefix='clevr_'): from vqa_lab.config import CLEVR_QA_DIR, CLEVR_IMG_H5, CLEVR_IMG_PNG, CLEVR_IMG_NPY parser = argparse.ArgumentParser(description='PyTorch CLEVR DataLoaer') parser.add_argument('--clevr_image_source', type=str, default='h5', help='h5|png') parser.add_argument('--clevr_qa_dir', type=str, default=CLEVR_QA_DIR) parser.add_argument('--clevr_img_h5', type=str, default=CLEVR_IMG_H5) parser.add_argument('--clevr_img_png', type=str, default=CLEVR_IMG_PNG) parser.add_argument('--clevr_load_trees', type=bool, default=True) parser.add_argument('--clevr_load_png', type=bool, default=False) my_opt, _ = parser.parse_known_args() prefix_len = len(opt_prefix) if isinstance(opt, dict): opt = argparse.Namespace(opt) my_opt = update_opt_remove_prefix( my_opt, opt, opt_prefix) # my_opt = { opt_prefix + '...' } if 'logdir' in opt.__dict__: print_save(opt.logdir, my_opt, to_screen=False) from .dataset_clevr import clevrDataset def my_collate(batch): raw_samples = {key: [d[key] for d in batch] for key in batch[0]} samples = { 'question': torch.stack(raw_samples['question']), 'image': torch.stack(raw_samples['image']), 'qid': torch.LongTensor(raw_samples['qid']), } if 'answer' in raw_samples: samples['answer'] = torch.LongTensor(raw_samples['answer']) if 'tree' in raw_samples: samples['tree'] = raw_samples['tree'] if 'img_png' in raw_samples: samples['img_png'] = torch.stack(raw_samples['img_png']) return samples dataset_opt = argparse.Namespace(**{ k[prefix_len:]: v for k, v in my_opt.__dict__.items() }) # remove opt prefix from functools import reduce dataset = reduce(lambda x, y: x + y, [clevrDataset(dataset_opt, m) for m in mode.split('+')]) return DataLoader(dataset, batch_size=opt.batch_size, collate_fn=my_collate, num_workers=opt.threads, shuffle=(mode == 'train'), drop_last=dl)
def ModelRunner_RBN(opt): parser = argparse.ArgumentParser(description='PyTorch RBNRunner') parser.add_argument('--rbn_vocab_size', type=int, default=None) parser.add_argument('--rbn_out_size', type=int, default=None) parser.add_argument('--rbn_word_emb', type=int, default=300) parser.add_argument('--rbn_lr', type=float, default=3e-4) parser.add_argument('--rbn_beta1', type=float, default=0.9) parser.add_argument('--rbn_beta2', type=float, default=0.999) parser.add_argument('--rbn_weight_decay', type=float, default=1e-5) parser.add_argument('--rbn_gpu', type=bool, default=False) parser.add_argument('--rbn_resume', type=str, default=None) parser.add_argument('--rbn_logdir', type=str, default=None) my_opt, _ = parser.parse_known_args() prefix_len = len('rbn_') from .resblocknet import ResBlockNet my_opt.rbn_vocab_size = opt.vocab_size my_opt.rbn_out_size = opt.out_vocab_size my_opt.rbn_gpu = opt.gpu my_opt.rbn_logdir = opt.logdir print_save(my_opt.rbn_logdir, my_opt) forced_copydir( os.path.dirname(__file__), os.path.join(my_opt.rbn_logdir, os.path.basename(os.path.dirname(__file__)))) def forward(model, input, volatile=False, only_forward=False): device = torch.device('cuda' if my_opt.rbn_gpu else "cpu") questions, images = input['question'].to(device), input['image'].to( device) with torch.set_grad_enabled(not volatile): predicts = model(questions, images) output = {'predicts': predicts.cpu()} if only_forward == False: answers = input['answer'].to(device) with torch.set_grad_enabled(not volatile): output['loss'] = F.cross_entropy(predicts, answers) return output model_opt = argparse.Namespace(**{ k[prefix_len:]: v for k, v in my_opt.__dict__.items() }) # remove opt prefix return ModelRunner(model=ResBlockNet, model_opt=model_opt, forward_fn=forward, optimizer='adam')
def DataloaderFigureQA(mode, opt={}, dl=False, opt_prefix='figureqa_'): from vqa_lab.config import FIGUREQA_IMG_PNG, FIGUREQA_QA_DIR parser = argparse.ArgumentParser( description='PyTorch FigureQA DataLoader(Openended)') parser.add_argument('--figureqa_img_png', type=str, default=FIGUREQA_IMG_PNG) parser.add_argument('--figureqa_qa_dir', type=str, default=FIGUREQA_QA_DIR) parser.add_argument('--figureqa_load_trees', type=bool, default=True) my_opt, _ = parser.parse_known_args() prefix_len = len(opt_prefix) if isinstance(opt, dict): opt = argparse.Namespace(**opt) my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix) if 'logdir' in opt.__dict__: print_save(opt.logdir, my_opt, to_screen=False) def my_collate(batch): raw_samples = {key: [d[key] for d in batch] for key in batch[0]} samples = { 'question': torch.stack(raw_samples['question']), 'image': torch.stack(raw_samples['image']), 'qid': torch.LongTensor(raw_samples['qid']), } if 'answer' in raw_samples: samples['answer'] = torch.LongTensor(raw_samples['answer']) if 'tree' in raw_samples: samples['tree'] = raw_samples['tree'] return samples dataset_opt = argparse.Namespace(**{ k[prefix_len:]: v for k, v in my_opt.__dict__.items() }) # remove opt prefix from .dataset_figureqa import figureqaDataset from functools import reduce dataset = reduce( lambda x, y: x + y, [figureqaDataset(dataset_opt, m) for m in mode.split('+')]) return DataLoader(dataset=dataset, batch_size=opt.batch_size, collate_fn=my_collate, num_workers=opt.threads, shuffle=('train' in mode), drop_last=dl)
def DataloaderVQAv2(mode, opt = {}, dl = False, opt_prefix = 'vqav2_'): from vqa_lab.config import VQAV2_QA_DIR, VQAV2_IMG_H5, VQAV2_IMG_BU, VQAV2_IMG_JPG parser = argparse.ArgumentParser(description='PyTorch VQAv2 DataLoader(Openended)') parser.add_argument('--vqav2_image_source', type=str, default='BU', help='h5|jpg|BU') parser.add_argument('--vqav2_img_h5', type=str, default=VQAV2_IMG_H5) parser.add_argument('--vqav2_img_BU', type=str, default=VQAV2_IMG_BU) parser.add_argument('--vqav2_img_jpg', type=str, default=VQAV2_IMG_JPG) parser.add_argument('--vqav2_qa_dir', type=str, default=VQAV2_QA_DIR) parser.add_argument('--vqav2_load_trees', type=bool, default=True) parser.add_argument('--vqav2_load_jpg', type=bool, default=False) my_opt, _ = parser.parse_known_args() prefix_len = len(opt_prefix) if isinstance(opt, dict) : opt = argparse.Namespace(**opt) my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix) if 'logdir' in opt.__dict__: print_save(opt.logdir, my_opt, to_screen=False) def my_collate(batch): raw_samples = { key: [d[key] for d in batch] for key in batch[0] } samples = { 'question' : torch.stack(raw_samples['question']) , 'image' : torch.stack(raw_samples['image']) , 'qid' : torch.LongTensor(raw_samples['qid']) , } if 'answer' in raw_samples : samples['answer'] = torch.LongTensor(raw_samples['answer']) if 'raw_answer' in raw_samples : samples['raw_answer'] = torch.stack(raw_samples['raw_answer']) if 'tree' in raw_samples : samples['tree'] = raw_samples['tree'] if 'img_jpg' in raw_samples : samples['img_jpg'] = torch.stack(raw_samples['img_jpg']) return samples dataset_opt = argparse.Namespace(**{ k[prefix_len:] : v for k, v in my_opt.__dict__.items() }) # remove opt prefix from .dataset_vqav2 import vqa2Dataset from functools import reduce dataset = reduce(lambda x, y : x + y, [vqa2Dataset(dataset_opt, m) for m in mode.split('+')]) return DataLoader(dataset = dataset , batch_size = opt.batch_size , collate_fn = my_collate , num_workers = opt.threads , shuffle = ('train' in mode), drop_last = dl)
def __init__(self, model, model_opt, forward_fn, optimizer, lr_scheduler=None): super(ModelRunner, self).__init__() self.model = model(model_opt) self.optimizer = optimizer self.lr_scheduler = lr_scheduler if self.optimizer == 'adam': self.optimizer = optim.Adam(self.model.parameters() , \ lr=model_opt.lr , \ betas=(model_opt.beta1, model_opt.beta2), \ weight_decay=model_opt.weight_decay) elif self.optimizer == 'sgd': self.optimizer = optim.SGD(self.model.parameters() , \ lr=model_opt.lr , \ momentum = model_opt.momentum , \ weight_decay=model_opt.weight_decay) elif self.optimizer == 'adamax': self.optimizer = optim.Adamax(self.model.parameters(), lr=model_opt.lr) if lr_scheduler == 'step': self.lr_scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, model_opt.lr_step_size, model_opt.lr_gamma) self.gpu = model_opt.gpu self.forward_fn = forward_fn self.finished_epoch = 0 device = torch.device('cuda' if self.gpu else "cpu") self.model = self.model.to(device) if model_opt.resume is not None: self.set_model_weights(model_opt.resume) else: self.set_model_weights('kaiming') print_save(model_opt.logdir, self.model)
def DataloaderSclevr(mode, opt={}, dl=False, opt_prefix='sclevr_'): from vqa_lab.config import SCLEVR_QA_DIR parser = argparse.ArgumentParser( description='PyTorch Sort-of-CLEVR DataLoader(Openended)') parser.add_argument('--sclevr_qa_dir', type=str, default=SCLEVR_QA_DIR) my_opt, _ = parser.parse_known_args() prefix_len = len(opt_prefix) if isinstance(opt, dict): opt = argparse.Namespace(**opt) my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix) if 'logdir' in opt.__dict__: print_save(opt.logdir, my_opt, to_screen=False) def my_collate(batch): raw_samples = {key: [d[key] for d in batch] for key in batch[0]} return { 'question': torch.stack(raw_samples['question']), 'image': torch.stack(raw_samples['image']), 'qid': torch.LongTensor(raw_samples['qid']), 'answer': torch.LongTensor(raw_samples['answer']), } dataset_opt = argparse.Namespace(**{ k[prefix_len:]: v for k, v in my_opt.__dict__.items() }) # remove opt prefix from .dataset_sclevr import sclevrDataset dataset = sclevrDataset(dataset_opt, mode) return DataLoader(dataset=dataset, batch_size=opt.batch_size, collate_fn=my_collate, num_workers=opt.threads, shuffle=(mode == 'train'), drop_last=dl)
default=66, help='random seed to use. Default=123') parser.add_argument( '--resume', type=str, default= '/home/wind/Research/VQA/old/ACMN-Pytorch/data/clevr/clevr_pretrained_model.pth', help='resume file name') parser.add_argument('--gpu', type=bool, default=True, help='use gpu or not') # log settings parser.add_argument('--logdir', type=str, default='logs/test', help='dir to tensorboard logs') opt, _ = parser.parse_known_args() print_save(opt.logdir, opt) torch.manual_seed(opt.seed) if opt.gpu: torch.cuda.manual_seed(opt.seed) #------ get dataloaders ------ from vqa_lab.data.data_loader import getDateLoader print('==> Loading datasets :') Dataloader = getDateLoader(opt.run_dataset) dataset_run = Dataloader('train', opt) opt.__dict__ = {**opt.__dict__, **dataset_run.dataset.opt} #----------- end ------------- #------ get mode_lrunner ----- from vqa_lab.model.model_runner import getModelRunner print('==> Building Network :')
def ModelRunner_Tree(opt): parser = argparse.ArgumentParser(description='PyTorch Tree Runner') parser.add_argument('--tree_lr', type=float, default=9e-5) parser.add_argument('--tree_beta1', type=float, default=0.9) parser.add_argument('--tree_beta2', type=float, default=0.999) parser.add_argument('--tree_weight_decay', type=float, default=0) parser.add_argument('--tree_gpu', type=bool, default=True) parser.add_argument('--tree_load_lookup', type=bool, default=False) parser.add_argument('--tree_resume', type=str, default=None) parser.add_argument('--tree_logdir', type=str, default=None) parser.add_argument('--tree_optim', type=str, default='adam') parser.add_argument('--tree_lr_scheduler', type=str, default=None) parser.add_argument('--tree_lr_step_size', type=int, default=20000) parser.add_argument('--tree_lr_gamma', type=float, default=0.7192) parser.add_argument('--tree_vocab_size', type=int, default=81) parser.add_argument('--tree_out_vocab_size', type=int, default=29) parser.add_argument('--tree_word_emb', type=int, default=300) parser.add_argument('--tree_commom_emb', type=int, default=256) parser.add_argument('--tree_dropout', type=float, default=0.0) parser.add_argument('--tree_encode', type=str, default='LSTM') parser.add_argument('--tree_img_emb', type=int, default=128) parser.add_argument('--tree_sent_len', type=int, default=45) parser.add_argument('--tree_sentence_emb', type=int, default=2048) my_opt, _ = parser.parse_known_args() prefix_len = len('tree_') from .tree_module import tree_attention_Residual my_opt.tree_vocab_size = opt.vocab_size my_opt.tree_out_size = opt.out_vocab_size my_opt.tree_gpu = opt.gpu my_opt.tree_logdir = opt.logdir my_opt.tree_resume = opt.resume print_save(my_opt.tree_logdir, my_opt) forced_copydir(os.path.dirname(__file__), os.path.join(my_opt.tree_logdir, os.path.basename(os.path.dirname(__file__)))) def forward(model, input, volatile = False, only_forward = False): questions, images, trees = Variable(input['question'], volatile=volatile), Variable(input['image'], volatile=volatile), input['tree'] if my_opt.tree_gpu : questions, images = questions.cuda(), images.cuda() predicts, node_values = model(questions, images, trees) output = { 'predicts': predicts.data.cpu(), 'node_values': node_values } if only_forward == False : answers = Variable(input['answer'], volatile=volatile) if my_opt.tree_gpu : answers = answers.cuda() output['loss'] = F.cross_entropy(predicts, answers) return output model_opt = argparse.Namespace(**{ k[prefix_len:] : v for k, v in my_opt.__dict__.items() }) # remove opt prefix return ModelRunner(model=tree_attention_Residual, model_opt=model_opt, forward_fn=forward, optimizer=model_opt.optim, lr_scheduler=model_opt.lr_scheduler)