Ejemplo n.º 1
0
def DataloaderClevr(mode, opt={}, dl=False, opt_prefix='clevr_'):

    from vqa_lab.config import CLEVR_QA_DIR, CLEVR_IMG_H5, CLEVR_IMG_PNG, CLEVR_IMG_NPY
    parser = argparse.ArgumentParser(description='PyTorch CLEVR DataLoaer')
    parser.add_argument('--clevr_image_source',
                        type=str,
                        default='h5',
                        help='h5|png')
    parser.add_argument('--clevr_qa_dir', type=str, default=CLEVR_QA_DIR)
    parser.add_argument('--clevr_img_h5', type=str, default=CLEVR_IMG_H5)
    parser.add_argument('--clevr_img_png', type=str, default=CLEVR_IMG_PNG)
    parser.add_argument('--clevr_load_trees', type=bool, default=True)
    parser.add_argument('--clevr_load_png', type=bool, default=False)
    my_opt, _ = parser.parse_known_args()
    prefix_len = len(opt_prefix)

    if isinstance(opt, dict): opt = argparse.Namespace(opt)

    my_opt = update_opt_remove_prefix(
        my_opt, opt, opt_prefix)  # my_opt = { opt_prefix + '...' }

    if 'logdir' in opt.__dict__:
        print_save(opt.logdir, my_opt, to_screen=False)

    from .dataset_clevr import clevrDataset

    def my_collate(batch):

        raw_samples = {key: [d[key] for d in batch] for key in batch[0]}

        samples = {
            'question': torch.stack(raw_samples['question']),
            'image': torch.stack(raw_samples['image']),
            'qid': torch.LongTensor(raw_samples['qid']),
        }

        if 'answer' in raw_samples:
            samples['answer'] = torch.LongTensor(raw_samples['answer'])
        if 'tree' in raw_samples:
            samples['tree'] = raw_samples['tree']
        if 'img_png' in raw_samples:
            samples['img_png'] = torch.stack(raw_samples['img_png'])

        return samples

    dataset_opt = argparse.Namespace(**{
        k[prefix_len:]: v
        for k, v in my_opt.__dict__.items()
    })  # remove opt prefix
    from functools import reduce
    dataset = reduce(lambda x, y: x + y,
                     [clevrDataset(dataset_opt, m) for m in mode.split('+')])

    return DataLoader(dataset,
                      batch_size=opt.batch_size,
                      collate_fn=my_collate,
                      num_workers=opt.threads,
                      shuffle=(mode == 'train'),
                      drop_last=dl)
Ejemplo n.º 2
0
def ModelRunner_RBN(opt):

    parser = argparse.ArgumentParser(description='PyTorch RBNRunner')
    parser.add_argument('--rbn_vocab_size', type=int, default=None)
    parser.add_argument('--rbn_out_size', type=int, default=None)
    parser.add_argument('--rbn_word_emb', type=int, default=300)
    parser.add_argument('--rbn_lr', type=float, default=3e-4)
    parser.add_argument('--rbn_beta1', type=float, default=0.9)
    parser.add_argument('--rbn_beta2', type=float, default=0.999)
    parser.add_argument('--rbn_weight_decay', type=float, default=1e-5)
    parser.add_argument('--rbn_gpu', type=bool, default=False)
    parser.add_argument('--rbn_resume', type=str, default=None)
    parser.add_argument('--rbn_logdir', type=str, default=None)
    my_opt, _ = parser.parse_known_args()
    prefix_len = len('rbn_')

    from .resblocknet import ResBlockNet
    my_opt.rbn_vocab_size = opt.vocab_size
    my_opt.rbn_out_size = opt.out_vocab_size
    my_opt.rbn_gpu = opt.gpu
    my_opt.rbn_logdir = opt.logdir

    print_save(my_opt.rbn_logdir, my_opt)
    forced_copydir(
        os.path.dirname(__file__),
        os.path.join(my_opt.rbn_logdir,
                     os.path.basename(os.path.dirname(__file__))))

    def forward(model, input, volatile=False, only_forward=False):

        device = torch.device('cuda' if my_opt.rbn_gpu else "cpu")

        questions, images = input['question'].to(device), input['image'].to(
            device)

        with torch.set_grad_enabled(not volatile):
            predicts = model(questions, images)

        output = {'predicts': predicts.cpu()}

        if only_forward == False:

            answers = input['answer'].to(device)

            with torch.set_grad_enabled(not volatile):
                output['loss'] = F.cross_entropy(predicts, answers)

        return output

    model_opt = argparse.Namespace(**{
        k[prefix_len:]: v
        for k, v in my_opt.__dict__.items()
    })  # remove opt prefix

    return ModelRunner(model=ResBlockNet,
                       model_opt=model_opt,
                       forward_fn=forward,
                       optimizer='adam')
Ejemplo n.º 3
0
def DataloaderFigureQA(mode, opt={}, dl=False, opt_prefix='figureqa_'):

    from vqa_lab.config import FIGUREQA_IMG_PNG, FIGUREQA_QA_DIR
    parser = argparse.ArgumentParser(
        description='PyTorch FigureQA DataLoader(Openended)')
    parser.add_argument('--figureqa_img_png',
                        type=str,
                        default=FIGUREQA_IMG_PNG)
    parser.add_argument('--figureqa_qa_dir', type=str, default=FIGUREQA_QA_DIR)
    parser.add_argument('--figureqa_load_trees', type=bool, default=True)
    my_opt, _ = parser.parse_known_args()
    prefix_len = len(opt_prefix)

    if isinstance(opt, dict): opt = argparse.Namespace(**opt)

    my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix)

    if 'logdir' in opt.__dict__:
        print_save(opt.logdir, my_opt, to_screen=False)

    def my_collate(batch):

        raw_samples = {key: [d[key] for d in batch] for key in batch[0]}

        samples = {
            'question': torch.stack(raw_samples['question']),
            'image': torch.stack(raw_samples['image']),
            'qid': torch.LongTensor(raw_samples['qid']),
        }

        if 'answer' in raw_samples:
            samples['answer'] = torch.LongTensor(raw_samples['answer'])
        if 'tree' in raw_samples:
            samples['tree'] = raw_samples['tree']

        return samples

    dataset_opt = argparse.Namespace(**{
        k[prefix_len:]: v
        for k, v in my_opt.__dict__.items()
    })  # remove opt prefix

    from .dataset_figureqa import figureqaDataset
    from functools import reduce
    dataset = reduce(
        lambda x, y: x + y,
        [figureqaDataset(dataset_opt, m) for m in mode.split('+')])

    return DataLoader(dataset=dataset,
                      batch_size=opt.batch_size,
                      collate_fn=my_collate,
                      num_workers=opt.threads,
                      shuffle=('train' in mode),
                      drop_last=dl)
Ejemplo n.º 4
0
def DataloaderVQAv2(mode, opt = {}, dl = False, opt_prefix = 'vqav2_'):

	from vqa_lab.config import VQAV2_QA_DIR, VQAV2_IMG_H5, VQAV2_IMG_BU, VQAV2_IMG_JPG
	parser = argparse.ArgumentParser(description='PyTorch VQAv2 DataLoader(Openended)')
	parser.add_argument('--vqav2_image_source', type=str, default='BU', help='h5|jpg|BU')
	parser.add_argument('--vqav2_img_h5', type=str, default=VQAV2_IMG_H5)
	parser.add_argument('--vqav2_img_BU', type=str, default=VQAV2_IMG_BU)
	parser.add_argument('--vqav2_img_jpg', type=str, default=VQAV2_IMG_JPG)
	parser.add_argument('--vqav2_qa_dir', type=str, default=VQAV2_QA_DIR)
	parser.add_argument('--vqav2_load_trees', type=bool, default=True)
	parser.add_argument('--vqav2_load_jpg', type=bool, default=False)
	my_opt, _  = parser.parse_known_args()
	prefix_len = len(opt_prefix)

	if isinstance(opt, dict) : opt = argparse.Namespace(**opt)

	my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix)

	if 'logdir' in opt.__dict__: print_save(opt.logdir, my_opt, to_screen=False)

	def my_collate(batch):

		raw_samples = { key: [d[key] for d in batch] for key in batch[0] }

		samples = {    
                    'question'   : torch.stack(raw_samples['question'])   ,
                    'image'      : torch.stack(raw_samples['image'])      ,
                    'qid'        : torch.LongTensor(raw_samples['qid'])   ,
                  }

		if 'answer'     in raw_samples : 
			samples['answer']     = torch.LongTensor(raw_samples['answer'])
		if 'raw_answer' in raw_samples : 
			samples['raw_answer'] = torch.stack(raw_samples['raw_answer'])
		if 'tree'       in raw_samples : 
			samples['tree']       = raw_samples['tree']
		if 'img_jpg'    in raw_samples :
			samples['img_jpg']    = torch.stack(raw_samples['img_jpg'])

		return samples

	dataset_opt = argparse.Namespace(**{ k[prefix_len:] : v for k, v in my_opt.__dict__.items() }) # remove opt prefix
          
	from .dataset_vqav2 import vqa2Dataset
	from functools import reduce
	dataset = reduce(lambda x, y : x + y, [vqa2Dataset(dataset_opt, m) for m in mode.split('+')])

	return DataLoader(dataset     = dataset          ,
                      batch_size  = opt.batch_size   ,
                      collate_fn  = my_collate       ,
                      num_workers = opt.threads      ,
                      shuffle     = ('train' in mode),
                      drop_last   = dl)
Ejemplo n.º 5
0
    def __init__(self,
                 model,
                 model_opt,
                 forward_fn,
                 optimizer,
                 lr_scheduler=None):
        super(ModelRunner, self).__init__()

        self.model = model(model_opt)
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler

        if self.optimizer == 'adam':
            self.optimizer = optim.Adam(self.model.parameters()     , \
                   lr=model_opt.lr                         , \
                   betas=(model_opt.beta1, model_opt.beta2), \
                   weight_decay=model_opt.weight_decay)
        elif self.optimizer == 'sgd':
            self.optimizer = optim.SGD(self.model.parameters()     , \
                   lr=model_opt.lr                     , \
                   momentum = model_opt.momentum       , \
                   weight_decay=model_opt.weight_decay)
        elif self.optimizer == 'adamax':
            self.optimizer = optim.Adamax(self.model.parameters(),
                                          lr=model_opt.lr)

        if lr_scheduler == 'step':
            self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
                self.optimizer, model_opt.lr_step_size, model_opt.lr_gamma)

        self.gpu = model_opt.gpu
        self.forward_fn = forward_fn

        self.finished_epoch = 0

        device = torch.device('cuda' if self.gpu else "cpu")
        self.model = self.model.to(device)

        if model_opt.resume is not None:
            self.set_model_weights(model_opt.resume)
        else:
            self.set_model_weights('kaiming')

        print_save(model_opt.logdir, self.model)
Ejemplo n.º 6
0
def DataloaderSclevr(mode, opt={}, dl=False, opt_prefix='sclevr_'):

    from vqa_lab.config import SCLEVR_QA_DIR
    parser = argparse.ArgumentParser(
        description='PyTorch Sort-of-CLEVR DataLoader(Openended)')
    parser.add_argument('--sclevr_qa_dir', type=str, default=SCLEVR_QA_DIR)
    my_opt, _ = parser.parse_known_args()
    prefix_len = len(opt_prefix)

    if isinstance(opt, dict): opt = argparse.Namespace(**opt)

    my_opt = update_opt_remove_prefix(my_opt, opt, opt_prefix)

    if 'logdir' in opt.__dict__:
        print_save(opt.logdir, my_opt, to_screen=False)

    def my_collate(batch):

        raw_samples = {key: [d[key] for d in batch] for key in batch[0]}

        return {
            'question': torch.stack(raw_samples['question']),
            'image': torch.stack(raw_samples['image']),
            'qid': torch.LongTensor(raw_samples['qid']),
            'answer': torch.LongTensor(raw_samples['answer']),
        }

    dataset_opt = argparse.Namespace(**{
        k[prefix_len:]: v
        for k, v in my_opt.__dict__.items()
    })  # remove opt prefix

    from .dataset_sclevr import sclevrDataset
    dataset = sclevrDataset(dataset_opt, mode)

    return DataLoader(dataset=dataset,
                      batch_size=opt.batch_size,
                      collate_fn=my_collate,
                      num_workers=opt.threads,
                      shuffle=(mode == 'train'),
                      drop_last=dl)
Ejemplo n.º 7
0
                    default=66,
                    help='random seed to use. Default=123')
parser.add_argument(
    '--resume',
    type=str,
    default=
    '/home/wind/Research/VQA/old/ACMN-Pytorch/data/clevr/clevr_pretrained_model.pth',
    help='resume file name')
parser.add_argument('--gpu', type=bool, default=True, help='use gpu or not')
# log settings
parser.add_argument('--logdir',
                    type=str,
                    default='logs/test',
                    help='dir to tensorboard logs')
opt, _ = parser.parse_known_args()
print_save(opt.logdir, opt)

torch.manual_seed(opt.seed)
if opt.gpu: torch.cuda.manual_seed(opt.seed)

#------ get dataloaders ------
from vqa_lab.data.data_loader import getDateLoader
print('==> Loading datasets :')
Dataloader = getDateLoader(opt.run_dataset)
dataset_run = Dataloader('train', opt)
opt.__dict__ = {**opt.__dict__, **dataset_run.dataset.opt}
#----------- end -------------

#------ get mode_lrunner -----
from vqa_lab.model.model_runner import getModelRunner
print('==> Building Network :')
Ejemplo n.º 8
0
def ModelRunner_Tree(opt):

	parser = argparse.ArgumentParser(description='PyTorch Tree Runner')
	parser.add_argument('--tree_lr', type=float, default=9e-5)
	parser.add_argument('--tree_beta1', type=float, default=0.9)
	parser.add_argument('--tree_beta2', type=float, default=0.999)
	parser.add_argument('--tree_weight_decay', type=float, default=0)
	parser.add_argument('--tree_gpu', type=bool, default=True)
	parser.add_argument('--tree_load_lookup', type=bool, default=False)
	parser.add_argument('--tree_resume', type=str, default=None)
	parser.add_argument('--tree_logdir', type=str, default=None)
	parser.add_argument('--tree_optim', type=str, default='adam')
	parser.add_argument('--tree_lr_scheduler', type=str, default=None)
	parser.add_argument('--tree_lr_step_size', type=int, default=20000)
	parser.add_argument('--tree_lr_gamma', type=float, default=0.7192)

	parser.add_argument('--tree_vocab_size', type=int, default=81)
	parser.add_argument('--tree_out_vocab_size', type=int, default=29)
	parser.add_argument('--tree_word_emb', type=int, default=300)
	parser.add_argument('--tree_commom_emb', type=int, default=256)
	parser.add_argument('--tree_dropout', type=float, default=0.0)
	parser.add_argument('--tree_encode', type=str, default='LSTM')
	parser.add_argument('--tree_img_emb', type=int, default=128)
	parser.add_argument('--tree_sent_len', type=int, default=45)
	parser.add_argument('--tree_sentence_emb', type=int, default=2048)
	
	my_opt, _ = parser.parse_known_args()
	prefix_len = len('tree_')

	from .tree_module import tree_attention_Residual
	my_opt.tree_vocab_size = opt.vocab_size
	my_opt.tree_out_size   = opt.out_vocab_size
	my_opt.tree_gpu 	   = opt.gpu
	my_opt.tree_logdir     = opt.logdir
	my_opt.tree_resume     = opt.resume

	print_save(my_opt.tree_logdir, my_opt)
	forced_copydir(os.path.dirname(__file__), os.path.join(my_opt.tree_logdir, os.path.basename(os.path.dirname(__file__))))

	def forward(model, input, volatile = False, only_forward = False):

		questions, images, trees = Variable(input['question'], volatile=volatile), Variable(input['image'], volatile=volatile), input['tree']
		
		if my_opt.tree_gpu :
			
			questions, images = questions.cuda(), images.cuda()

		predicts, node_values = model(questions, images, trees)

		output = { 'predicts': predicts.data.cpu(), 'node_values': node_values }

		if only_forward == False : 
			
			answers = Variable(input['answer'], volatile=volatile)

			if my_opt.tree_gpu : answers = answers.cuda()

			output['loss'] = F.cross_entropy(predicts, answers)

		return output
	
	model_opt = argparse.Namespace(**{ k[prefix_len:] : v for k, v in my_opt.__dict__.items() }) # remove opt prefix

	return ModelRunner(model=tree_attention_Residual, model_opt=model_opt, forward_fn=forward, optimizer=model_opt.optim, lr_scheduler=model_opt.lr_scheduler)