def main(args): if args.randomize_checkpoint_path == 1: name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) vocab = utils.load_vocab(args.vocab_json) if args.use_local_copies == 1: shutil.copy(args.train_question_h5, '/tmp/train_questions.h5') shutil.copy(args.train_features_h5, '/tmp/train_features.h5') shutil.copy(args.val_question_h5, '/tmp/val_questions.h5') shutil.copy(args.val_features_h5, '/tmp/val_features.h5') args.train_question_h5 = '/tmp/train_questions.h5' args.train_features_h5 = '/tmp/train_features.h5' args.val_question_h5 = '/tmp/val_questions.h5' args.val_features_h5 = '/tmp/val_features.h5' question_families = None if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: question_families = json.load(f) train_loader_kwargs = { 'question_h5': args.train_question_h5, 'feature_h5': args.train_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': args.shuffle_train_data == 1, 'question_families': question_families, 'min_program_depth': args.min_program_depth, 'max_program_depth': args.max_program_depth, 'max_samples': args.num_train_samples, 'num_workers': args.loader_num_workers, 'drop_last': True, } val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'min_program_depth': args.min_program_depth, 'max_program_depth': args.max_program_depth, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: train_loop(args, train_loader, val_loader) if args.use_local_copies == 1 and args.cleanup_local_copies == 1: os.remove('/tmp/train_questions.h5') os.remove('/tmp/train_features.h5') os.remove('/tmp/val_questions.h5') os.remove('/tmp/val_features.h5')
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_visible if args.randomize_checkpoint_path == 1: # default 0 name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) vocab = utils.load_vocab(args.vocab_json) if args.use_local_copies == 1: # default 0 shutil.copy(args.train_question_h5, '/tmp/train_questions.h5') shutil.copy(args.train_features_h5, '/tmp/train_features.h5') shutil.copy(args.val_question_h5, '/tmp/val_questions.h5') shutil.copy(args.val_features_h5, '/tmp/val_features.h5') args.train_question_h5 = '/tmp/train_questions.h5' args.train_features_h5 = '/tmp/train_features.h5' args.val_question_h5 = '/tmp/val_questions.h5' args.val_features_h5 = '/tmp/val_features.h5' question_families = None if args.family_split_file is not None: # default None with open(args.family_split_file, 'r') as f: question_families = json.load(f) train_loader_kwargs = { 'question_h5': args.train_question_h5, # path 'feature_h5': args.train_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': args.shuffle_train_data == 1, 'question_families': question_families, # None 'max_samples': args.num_train_samples, # None 'num_workers': args.loader_num_workers, # 1,default } val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: train_loop(args, train_loader, val_loader) if args.use_local_copies == 1 and args.cleanup_local_copies == 1: # 0 os.remove('/tmp/train_questions.h5') os.remove('/tmp/train_features.h5') os.remove('/tmp/val_questions.h5') os.remove('/tmp/val_features.h5')
def main(args): assert args.min_program_depth == args.max_program_depth, \ "This script is for validating at one singular depth." if args.randomize_checkpoint_path == 1: name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) vocab = utils.load_vocab(args.vocab_json) if args.use_local_copies == 1: shutil.copy(args.train_question_h5, '/tmp/train_questions.h5') shutil.copy(args.train_features_h5, '/tmp/train_features.h5') shutil.copy(args.val_question_h5, '/tmp/val_questions.h5') shutil.copy(args.val_features_h5, '/tmp/val_features.h5') args.train_question_h5 = '/tmp/train_questions.h5' args.train_features_h5 = '/tmp/train_features.h5' args.val_question_h5 = '/tmp/val_questions.h5' args.val_features_h5 = '/tmp/val_features.h5' question_families = None if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: question_families = json.load(f) val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'min_program_depth': args.min_program_depth, 'max_program_depth': args.max_program_depth, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**val_loader_kwargs) as val_loader: val_acc = None if len(val_loader) > 0: val_acc = validation_procedure(args, val_loader) if val_acc is not None: depth_accs = dict() if os.path.exists(args.output_json): with open(args.output_json) as fd: depth_accs = json.load(fd) program_depth = args.min_program_depth if program_depth not in depth_accs: depth_accs[program_depth] = val_acc with open(args.output_json,'w') as fd: json.dump(depth_accs, fd) else: print(f'WARNING: depth {program_depth} is already in document.')
def main(args): if args.debug_every <= 1: pdb.set_trace() model = None if args.baseline_model is not None: print('Loading baseline model from ', args.baseline_model) model, _ = utils.load_baseline(args.baseline_model) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) model.rnn.expand_vocab(new_vocab['question_token_to_idx']) elif (args.program_generator is not None and args.execution_engine is not None): pg, _ = utils.load_program_generator(args.program_generator, args.model_type) ee, _ = utils.load_execution_engine(args.execution_engine, verbose=False, model_type=args.model_type) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) pg.expand_encoder_vocab(new_vocab['question_token_to_idx']) model = (pg, ee) else: print('Must give either --baseline_model or --program_generator' + 'and --execution_engine') return dtype = torch.FloatTensor if args.use_gpu == 1: dtype = torch.cuda.FloatTensor if args.question is not None and args.image is not None: run_single_example(args, model, dtype, args.question) # Interactive mode elif (args.image is not None and args.input_question_h5 is None and args.input_features_h5 is None): feats_var = extract_image_features(args, dtype) print(colored('Ask me something!', 'cyan')) while True: # Get user question question_raw = input(">>> ") run_single_example(args, model, dtype, question_raw, feats_var) else: vocab = load_vocab(args) loader_kwargs = { 'question_h5': args.input_question_h5, 'feature_h5': args.input_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, } if args.num_samples is not None and args.num_samples > 0: loader_kwargs['max_samples'] = args.num_samples if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: loader_kwargs['question_families'] = json.load(f) with ClevrDataLoader(**loader_kwargs) as loader: run_batch(args, model, dtype, loader)
def main(args): if not args.program_generator: args.program_generator = args.execution_engine input_question_h5 = os.path.join(args.data_dir, '{}_questions.h5'.format(args.part)) input_features_h5 = os.path.join(args.data_dir, '{}_features.h5'.format(args.part)) model = None if args.baseline_model is not None: print('Loading baseline model from ', args.baseline_model) model, _ = utils.load_baseline(args.baseline_model) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) model.rnn.expand_vocab(new_vocab['question_token_to_idx']) elif args.program_generator is not None and args.execution_engine is not None: pg, _ = utils.load_program_generator(args.program_generator) ee, _ = utils.load_execution_engine(args.execution_engine, verbose=False) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) pg.expand_encoder_vocab(new_vocab['question_token_to_idx']) model = (pg, ee) else: print( 'Must give either --baseline_model or --program_generator and --execution_engine' ) return dtype = torch.FloatTensor if args.use_gpu == 1: dtype = torch.cuda.FloatTensor if args.question is not None and args.image is not None: run_single_example(args, model, dtype, args.question) else: vocab = load_vocab(args) loader_kwargs = { 'question_h5': input_question_h5, 'feature_h5': input_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, } if args.num_samples is not None and args.num_samples > 0: loader_kwargs['max_samples'] = args.num_samples if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: loader_kwargs['question_families'] = json.load(f) with ClevrDataLoader(**loader_kwargs) as loader: run_batch(args, model, dtype, loader)
def main(args): if not args.program_generator: args.program_generator = args.execution_engine input_question_h5 = os.path.join(args.data_dir, '{}_questions.h5'.format(args.part)) input_features_h5 = os.path.join(args.data_dir, '{}_features.h5'.format(args.part)) input_scenes = os.path.join(args.data_dir, '{}_scenes.json'.format(args.part)) vocab = load_vocab(args) pg, _ = utils.load_program_generator(args.program_generator) if pg: pg.save_activations = True if args.temperature: pg.decoder_linear.weight.data /= args.temperature pg.decoder_linear.bias.data /= args.temperature if args.execution_engine: ee, _ = utils.load_execution_engine(args.execution_engine, verbose=False) ee.noise_enabled = False else: ee = ClevrExecutor(vocab) dtype = torch.FloatTensor if args.use_gpu == 1: dtype = torch.cuda.FloatTensor loader_kwargs = { 'question_h5': input_question_h5, 'feature_h5': input_features_h5, 'scene_path': input_scenes if isinstance(ee, ClevrExecutor) else None, 'vocab': vocab, 'batch_size': args.batch_size, } if args.num_samples is not None and args.num_samples > 0: loader_kwargs['max_samples'] = args.num_samples if args.q_family: loader_kwargs['question_families'] = args.q_family with ClevrDataLoader(**loader_kwargs) as loader: with torch.no_grad(): run_batch(args, pg, ee, loader, dtype)
def main(args): input_question_h5 = os.path.join(args.data_dir, '{}_questions.h5'.format(args.part)) input_features_h5 = os.path.join(args.data_dir, '{}_features.h5'.format(args.part)) pg, _ = utils.load_program_generator(args.program_generator) dtype = torch.FloatTensor if args.use_gpu == 1: dtype = torch.cuda.FloatTensor vocab = load_vocab(args) loader_kwargs = { 'question_h5': input_question_h5, 'feature_h5': input_features_h5, 'vocab': vocab, 'batch_size': 128, } with ClevrDataLoader(**loader_kwargs) as loader: run_batch(args, pg, loader, dtype)
def main(args): torch.autograd.set_detect_anomaly(True) # for reproducibility torch.cuda.set_device(args.device_idd) torch.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.benckmark = False torch.backends.cudnn.deterministic = True if args.randomize_checkpoint_path == 1: name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) vocab = utils.load_vocab(args.vocab_json) ##### Modified ##### ### For Tensorboard ### #log_interval_num = args.log_interval #save_interval_num = args.save_interval log_dir = os.path.join(args.root_log_dir, args.log_dir) os.mkdir(log_dir) os.mkdir(os.path.join(log_dir, 'runs')) writer = SummaryWriter(log_dir=os.path.join(log_dir, 'runs')) ##### Modified ##### if args.use_local_copies == 1: shutil.copy(args.train_question_h5, '/tmp/train_questions.h5') shutil.copy(args.train_features_h5, '/tmp/train_features.h5') shutil.copy(args.train_features_h5_aux, '/tmp/train_features_aux.h5') shutil.copy(args.val_question_h5, '/tmp/val_questions.h5') shutil.copy(args.val_features_h5, '/tmp/val_features.h5') shutil.copy(args.val_features_h5_aux, '/tmp/val_features_aux.h5') args.train_question_h5 = '/tmp/train_questions.h5' args.train_features_h5 = '/tmp/train_features.h5' args.train_features_h5_aux = '/tmp/train_features_aux.h5' args.val_question_h5 = '/tmp/val_questions.h5' args.val_features_h5 = '/tmp/val_features.h5' args.val_features_h5_aux = '/tmp/val_features_aux.h5' question_families = None if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: question_families = json.load(f) ##### Modified ##### train_loader_kwargs = { 'question_h5': args.train_question_h5, 'feature_h5': args.train_features_h5, 'feature_h5_aux': args.train_features_h5_aux, 'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': args.shuffle_train_data == 1, 'question_families': question_families, 'max_samples': args.num_train_samples, 'num_workers': args.loader_num_workers, } val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'feature_h5_aux': args.val_features_h5_aux, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: train_loop(args, train_loader, val_loader, writer) ##### Modified ##### if args.use_local_copies == 1 and args.cleanup_local_copies == 1: os.remove('/tmp/train_questions.h5') os.remove('/tmp/train_features.h5') os.remove('/tmp/train_features_aux.h5') os.remove('/tmp/val_questions.h5') os.remove('/tmp/val_features.h5') os.remove('/tmp/val_features_aux.h5')
def main(args): if args.randomize_checkpoint_path == 1: name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) if args.data_dir: args.train_question_h5 = os.path.join(args.data_dir, args.train_question_h5) args.train_features_h5 = os.path.join(args.data_dir, args.train_features_h5) args.val_question_h5 = os.path.join(args.data_dir, args.val_question_h5) args.val_features_h5 = os.path.join(args.data_dir, args.val_features_h5) args.vocab_json = os.path.join(args.data_dir, args.vocab_json) if not args.checkpoint_path: if 'SLURM_JOB_ID' in os.environ: args.checkpoint_path = os.environ['SLURM_JOB_ID'] + '.pt' else: raise NotImplementedError() vocab = utils.load_vocab(args.vocab_json) if args.use_local_copies == 1: if os.path.exists('/Tmpfast'): tmp = '/Tmpfast/' else: tmp = '/Tmp/' if not os.path.exists(tmp + 'bahdanau'): os.mkdir(tmp + 'bahdanau') if not os.path.exists(tmp + 'bahdanau/clevr'): os.mkdir(tmp + 'bahdanau/clevr') root = tmp + 'bahdanau/clevr/' def rsync_copy_if_not_exists(src, dst): if not os.path.exists(dst): os.system("rsync -vrz --progress {} {}".format(src, dst)) rsync_copy_if_not_exists(args.train_question_h5, root + 'train_questions.h5') rsync_copy_if_not_exists(args.train_features_h5, root + 'train_features.h5') rsync_copy_if_not_exists(args.val_question_h5, root + 'val_questions.h5') rsync_copy_if_not_exists(args.val_features_h5, root + 'val_features.h5') args.train_question_h5 = root + 'train_questions.h5' args.train_features_h5 = root + 'train_features.h5' args.val_question_h5 = root + 'val_questions.h5' args.val_features_h5 = root + 'val_features.h5' question_families = None if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: question_families = json.load(f) train_loader_kwargs = { 'question_h5': args.train_question_h5, 'feature_h5': args.train_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': args.shuffle_train_data == 1, 'question_families': question_families, 'max_samples': args.num_train_samples, 'num_workers': args.loader_num_workers, } val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: args.max_program_module_arity = max(train_loader.max_arity, val_loader.max_arity) args.max_program_tree_depth = max(train_loader.max_depth, val_loader.max_depth) train_loop(args, train_loader, val_loader) if args.use_local_copies == 1 and args.cleanup_local_copies == 1: os.remove('/tmp/train_questions.h5') os.remove('/tmp/train_features.h5') os.remove('/tmp/val_questions.h5') os.remove('/tmp/val_features.h5')
'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': False, 'max_samples': args.num_train_samples, 'num_workers': args.loader_num_workers, } val_loader_kwargs = { 'question_h5': os.path.join(args.data_dir, 'val_questions.h5'), 'feature_h5': os.path.join(args.data_dir, 'val_features.h5'), 'vocab': vocab, 'batch_size': args.batch_size, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: best_val_acc = 0.0 for i in range(30): logger.info('Epoch ' + str(i)) train_loss, train_acc = eval_epoch(train_loader, film_gen, filmed_net, opt=opt) valid_loss, valid_acc = eval_epoch(val_loader, film_gen, filmed_net) if train_loss.ndim == 1: train_loss = train_loss[0] valid_loss = valid_loss[0] logger.info("{}, {}, {}, {}".format(train_loss, train_acc,
def main(args): if args.randomize_checkpoint_path == 1: name, ext = os.path.splitext(args.checkpoint_path) num = random.randint(1, 1000000) args.checkpoint_path = '%s_%06d%s' % (name, num, ext) print('Will save checkpoints to %s' % args.checkpoint_path) if args.sw_name is not None or args.sw_config is not None: from shapeworld import Dataset, torch_util from shapeworld.datasets import clevr_util class ShapeWorldDataLoader(torch_util.ShapeWorldDataLoader): def __init__(self, **kwargs): super(ShapeWorldDataLoader, self).__init__(**kwargs) def __iter__(self): for batch in super(ShapeWorldDataLoader, self).__iter__(): question = batch['caption'].long() image = batch['world'] feats = batch['world'] answer = batch['agreement'].long() if 'caption_model' in batch: program_seq = batch['caption_model'].apply_(callable=(lambda model: clevr_util.parse_program(mode=0, model=model))) else: program_seq = torch.IntTensor([0 for _ in batch['caption']]) program_json = dict() yield question, image, feats, answer, program_seq, program_json dataset = Dataset.create(dtype='agreement', name=args.sw_name, variant=args.sw_variant, language=args.sw_language, config=args.sw_config) print('ShapeWorld dataset: {} (variant: {})'.format(dataset, args.sw_variant)) print('Config: ' + str(args.sw_config)) if args.program_generator_start_from is None: question_token_to_idx = { word: index + 2 if index > 0 else 0 for word, index in dataset.vocabularies['language'].items() } question_token_to_idx['<NULL>'] = 0 question_token_to_idx['<START>'] = 1 question_token_to_idx['<END>'] = 2 vocab = dict( question_token_to_idx=question_token_to_idx, program_token_to_idx={'<NULL>': 0, '<START>': 1, '<END>': 2}, # missing!!! answer_token_to_idx={'false': 0, 'true': 1} ) with open(args.checkpoint_path + '.vocab', 'w') as filehandle: json.dump(vocab, filehandle) else: with open(args.program_generator_start_from + '.vocab', 'r') as filehandle: vocab = json.load(filehandle) question_token_to_idx = vocab['question_token_to_idx'] index = len(question_token_to_idx) for word in dataset.vocabularies['language']: if word not in question_token_to_idx: question_token_to_idx[word] = index index += 1 with open(args.checkpoint_path + '.vocab', 'w') as filehandle: json.dump(vocab, filehandle) args.feature_dim = ','.join(str(n) for n in reversed(dataset.world_shape())) args.vocab_json = args.checkpoint_path + '.vocab' train_dataset = torch_util.ShapeWorldDataset(dataset=dataset, mode='train') # , include_model=True) train_loader = ShapeWorldDataLoader(dataset=train_dataset, batch_size=args.batch_size) # num_workers=1 if args.sw_mixer == 1: val_loader = list() for d in dataset.datasets: val_dataset = torch_util.ShapeWorldDataset(dataset=d, mode='validation', epoch=(args.num_val_samples is None)) val_loader.append(ShapeWorldDataLoader(dataset=val_dataset, batch_size=args.batch_size)) # num_workers=1 else: val_dataset = torch_util.ShapeWorldDataset(dataset=dataset, mode='validation', epoch=(args.num_val_samples is None)) val_loader = ShapeWorldDataLoader(dataset=val_dataset, batch_size=args.batch_size) # num_workers=1 train_loop(args, train_loader, val_loader) else: vocab = utils.load_vocab(args.vocab_json) if args.use_local_copies == 1: shutil.copy(args.train_question_h5, '/tmp/train_questions.h5') shutil.copy(args.train_features_h5, '/tmp/train_features.h5') shutil.copy(args.val_question_h5, '/tmp/val_questions.h5') shutil.copy(args.val_features_h5, '/tmp/val_features.h5') args.train_question_h5 = '/tmp/train_questions.h5' args.train_features_h5 = '/tmp/train_features.h5' args.val_question_h5 = '/tmp/val_questions.h5' args.val_features_h5 = '/tmp/val_features.h5' question_families = None if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: question_families = json.load(f) train_loader_kwargs = { 'question_h5': args.train_question_h5, 'feature_h5': args.train_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'shuffle': args.shuffle_train_data == 1, 'question_families': question_families, 'max_samples': args.num_train_samples, 'num_workers': args.loader_num_workers, } val_loader_kwargs = { 'question_h5': args.val_question_h5, 'feature_h5': args.val_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, 'question_families': question_families, 'max_samples': args.num_val_samples, 'num_workers': args.loader_num_workers, } with ClevrDataLoader(**train_loader_kwargs) as train_loader, \ ClevrDataLoader(**val_loader_kwargs) as val_loader: train_loop(args, train_loader, val_loader) if args.use_local_copies == 1 and args.cleanup_local_copies == 1: os.remove('/tmp/train_questions.h5') os.remove('/tmp/train_features.h5') os.remove('/tmp/val_questions.h5') os.remove('/tmp/val_features.h5')
question_raw = input(">>> ") run_single_example(args, model, dtype, question_raw, feats_var) elif args.sw_name is not None: run_batch(args, model, dtype, loader) else: vocab = load_vocab(args) loader_kwargs = { 'question_h5': args.input_question_h5, 'feature_h5': args.input_features_h5, 'vocab': vocab, 'batch_size': args.batch_size, } if args.family_split_file is not None: with open(args.family_split_file, 'r') as f: loader_kwargs['question_families'] = json.load(f) with ClevrDataLoader(**loader_kwargs) as loader: run_batch(args, model, dtype, loader) def extract_image_features(args, dtype): # Build the CNN to use for feature extraction print('Extracting image features...') cnn = build_cnn(args, dtype) # Load and preprocess the image img_size = (args.image_height, args.image_width) img = imread(args.image, mode='RGB') img = imresize(img, img_size, interp='bicubic') img = img.transpose(2, 0, 1)[None] mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1) std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)
import time import os from vr.data import ClevrDataLoader loader_kwargs = { 'question_h5': os.path.join(os.environ['data_dir'], 'train_questions.h5'), 'feature_h5': os.path.join(os.environ['data_dir'], 'train_features.h5'), 'vocab': os.path.join(os.environ['data_dir'], 'vocab.json'), 'batch_size': 64, 'shuffle': 0, } loader = ClevrDataLoader(**loader_kwargs) t = time.time() i = 0 for batch in loader: if i > 100: break i += 1 print(i) print(time.time() - t)
def main(args): if args.debug_every <= 1: pdb.set_trace() if args.sw_name is not None or args.sw_config is not None: assert args.image is None and args.question is None from shapeworld import Dataset, torch_util from shapeworld.datasets import clevr_util class ShapeWorldDataLoader(torch_util.ShapeWorldDataLoader): def __iter__(self): for batch in super(ShapeWorldDataLoader, self).__iter__(): if "caption" in batch: question = batch["caption"].long() else: question = batch["question"].long() if args.sw_features == 1: image = batch["world_features"] else: image = batch["world"] feats = image if "agreement" in batch: answer = batch["agreement"].long() else: answer = batch["answer"].long() if "caption_model" in batch: assert args.sw_name.startswith( "clevr") or args.sw_program == 3 program_seq = batch["caption_model"] # .apply_(callable=(lambda model: clevr_util.parse_program(mode=0, model=model))) elif "question_model" in batch: program_seq = batch["question_model"] elif "caption" in batch: if args.sw_program == 1: program_seq = batch["caption_pn"].long() elif args.sw_program == 2: program_seq = batch["caption_rpn"].long() else: program_seq = [None] else: program_seq = [None] # program_seq = torch.IntTensor([0 for _ in batch['question']]) program_json = dict() yield question, image, feats, answer, program_seq, program_json dataset = Dataset.create( dtype=args.sw_type, name=args.sw_name, variant=args.sw_variant, language=args.sw_language, config=args.sw_config, ) print("ShapeWorld dataset: {} (variant: {})".format( dataset, args.sw_variant)) print("Config: " + str(args.sw_config)) if args.program_generator is not None: with open(args.program_generator + ".vocab", "r") as filehandle: vocab = json.load(filehandle) elif args.execution_engine is not None: with open(args.execution_engine + ".vocab", "r") as filehandle: vocab = json.load(filehandle) elif args.baseline_model is not None: with open(args.baseline_model + ".vocab", "r") as filehandle: vocab = json.load(filehandle) program_token_to_idx = vocab["program_token_to_idx"] include_model = args.model_type in ("PG", "EE", "PG+EE") and ( args.sw_name.startswith("clevr") or args.sw_program == 3) if include_model: def preprocess(model): if args.sw_name.startswith("clevr"): program_prefix = vr.programs.list_to_prefix( model["program"]) else: program_prefix = clevr_util.parse_program(mode=0, model=model) program_str = vr.programs.list_to_str(program_prefix) program_tokens = tokenize(program_str) program_encoded = encode(program_tokens, program_token_to_idx) program_encoded += [ program_token_to_idx["<NULL>"] for _ in range(27 - len(program_encoded)) ] return np.asarray(program_encoded, dtype=np.int64) if args.sw_name.startswith("clevr"): preprocessing = dict(question_model=preprocess) else: preprocessing = dict(caption_model=preprocess) elif args.sw_program in (1, 2): def preprocess(caption_pn): caption_pn += (caption_pn > 0) * 2 for n, symbol in enumerate(caption_pn): if symbol == 0: caption_pn[n] = 2 break caption_pn = np.concatenate(([1], caption_pn)) return caption_pn if args.sw_program == 1: preprocessing = dict(caption_pn=preprocess) else: preprocessing = dict(caption_rpn=preprocess) else: preprocessing = None dataset = torch_util.ShapeWorldDataset( dataset=dataset, mode=(None if args.sw_mode == "none" else args.sw_mode), include_model=include_model, epoch=(args.num_samples is None), preprocessing=preprocessing, ) loader = ShapeWorldDataLoader(dataset=dataset, batch_size=args.batch_size) model = None if args.model_type in ("CNN", "LSTM", "CNN+LSTM", "CNN+LSTM+SA"): assert args.baseline_model is not None print("Loading baseline model from", args.baseline_model) model, _ = utils.load_baseline(args.baseline_model) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) model.rnn.expand_vocab(new_vocab["question_token_to_idx"]) elif args.program_generator is not None and args.execution_engine is not None: pg, _ = utils.load_program_generator(args.program_generator, args.model_type) ee, _ = utils.load_execution_engine(args.execution_engine, verbose=False, model_type=args.model_type) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) pg.expand_encoder_vocab(new_vocab["question_token_to_idx"]) model = (pg, ee) elif args.model_type == "FiLM": assert args.baseline_model is not None pg, _ = utils.load_program_generator(args.baseline_model, args.model_type) ee, _ = utils.load_execution_engine(args.baseline_model, verbose=False, model_type=args.model_type) if args.vocab_json is not None: new_vocab = utils.load_vocab(args.vocab_json) pg.expand_encoder_vocab(new_vocab["question_token_to_idx"]) model = (pg, ee) else: print( "Must give either --baseline_model or --program_generator and --execution_engine" ) return if torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor if args.question is not None and args.image is not None: run_single_example(args, model, dtype, args.question) # Interactive mode elif (args.image is not None and args.input_question_h5 is None and args.input_features_h5 is None): feats_var = extract_image_features(args, dtype) print(colored("Ask me something!", "cyan")) while True: # Get user question question_raw = input(">>> ") run_single_example(args, model, dtype, question_raw, feats_var) elif args.sw_name is not None or args.sw_config is not None: predictions, visualization = run_batch(args, model, dtype, loader) if args.sw_pred_dir is not None: assert args.sw_pred_name is not None pred_dir = os.path.join( args.sw_pred_dir, dataset.dataset.type, dataset.dataset.name, dataset.dataset.variant, ) if not os.path.isdir(pred_dir): os.makedirs(pred_dir) id2word = dataset.dataset.vocabulary(value_type="language") with open( os.path.join( pred_dir, args.sw_pred_name + "-" + args.sw_mode + ".txt"), "w", ) as filehandle: filehandle.write("".join( "{} {} {}\n".format(correct, agreement, " ".join( id2word[c] for c in caption)) for correct, agreement, caption in zip( predictions["correct"], predictions["agreement"], predictions["caption"], ))) print("Predictions saved") if args.sw_vis_dir is not None: assert args.sw_vis_name is not None from io import BytesIO from shapeworld.world import World vis_dir = os.path.join( args.sw_vis_dir, dataset.dataset.type, dataset.dataset.name, dataset.dataset.variant, ) image_dir = os.path.join(vis_dir, args.sw_mode, "images") if not os.path.isdir(image_dir): os.makedirs(image_dir) worlds = np.transpose(visualization["world"], (0, 2, 3, 1)) for n in range(worlds.shape[0]): image = World.get_image(world_array=worlds[n]) image_bytes = BytesIO() image.save(image_bytes, format="png") with open(os.path.join(image_dir, "world-{}.png".format(n)), "wb") as filehandle: filehandle.write(image_bytes.getvalue()) image_bytes.close() with open( os.path.join( vis_dir, args.sw_vis_name + "-" + args.sw_mode + ".html"), "w", ) as filehandle: html = dataset.dataset.get_html( generated=visualization, image_format="png", image_dir=(args.sw_mode + "/images/"), ) filehandle.write(html) print("Visualization saved") else: vocab = load_vocab(args) loader_kwargs = { "question_h5": args.input_question_h5, "feature_h5": args.input_features_h5, "vocab": vocab, "batch_size": args.batch_size, } if args.family_split_file is not None: with open(args.family_split_file, "r") as f: loader_kwargs["question_families"] = json.load(f) with ClevrDataLoader(**loader_kwargs) as loader: run_batch(args, model, dtype, loader)