Example #1
0
def main(args):
    if args.debug_every <= 1:
        pdb.set_trace()
    model = None
    if args.baseline_model is not None:
        print('Loading baseline model from ', args.baseline_model)
        model, _ = utils.load_baseline(args.baseline_model)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            model.rnn.expand_vocab(new_vocab['question_token_to_idx'])
    elif (args.program_generator is not None
          and args.execution_engine is not None):
        pg, _ = utils.load_program_generator(args.program_generator,
                                             args.model_type)
        ee, _ = utils.load_execution_engine(args.execution_engine,
                                            verbose=False,
                                            model_type=args.model_type)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            pg.expand_encoder_vocab(new_vocab['question_token_to_idx'])
        model = (pg, ee)
    else:
        print('Must give either --baseline_model or --program_generator' +
              'and --execution_engine')
        return

    dtype = torch.FloatTensor
    if args.use_gpu == 1:
        dtype = torch.cuda.FloatTensor
    if args.question is not None and args.image is not None:
        run_single_example(args, model, dtype, args.question)
    # Interactive mode
    elif (args.image is not None and args.input_question_h5 is None
          and args.input_features_h5 is None):
        feats_var = extract_image_features(args, dtype)
        print(colored('Ask me something!', 'cyan'))
        while True:
            # Get user question
            question_raw = input(">>> ")
            run_single_example(args, model, dtype, question_raw, feats_var)
    else:
        vocab = load_vocab(args)
        loader_kwargs = {
            'question_h5': args.input_question_h5,
            'feature_h5': args.input_features_h5,
            'vocab': vocab,
            'batch_size': args.batch_size,
        }
        if args.num_samples is not None and args.num_samples > 0:
            loader_kwargs['max_samples'] = args.num_samples
        if args.family_split_file is not None:
            with open(args.family_split_file, 'r') as f:
                loader_kwargs['question_families'] = json.load(f)
        with ClevrDataLoader(**loader_kwargs) as loader:
            run_batch(args, model, dtype, loader)
def main(args):
    if not args.program_generator:
        args.program_generator = args.execution_engine
    input_question_h5 = os.path.join(args.data_dir,
                                     '{}_questions.h5'.format(args.part))
    input_features_h5 = os.path.join(args.data_dir,
                                     '{}_features.h5'.format(args.part))

    model = None
    if args.baseline_model is not None:
        print('Loading baseline model from ', args.baseline_model)
        model, _ = utils.load_baseline(args.baseline_model)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            model.rnn.expand_vocab(new_vocab['question_token_to_idx'])
    elif args.program_generator is not None and args.execution_engine is not None:
        pg, _ = utils.load_program_generator(args.program_generator)
        ee, _ = utils.load_execution_engine(args.execution_engine,
                                            verbose=False)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            pg.expand_encoder_vocab(new_vocab['question_token_to_idx'])
        model = (pg, ee)
    else:
        print(
            'Must give either --baseline_model or --program_generator and --execution_engine'
        )
        return

    dtype = torch.FloatTensor
    if args.use_gpu == 1:
        dtype = torch.cuda.FloatTensor
    if args.question is not None and args.image is not None:
        run_single_example(args, model, dtype, args.question)
    else:
        vocab = load_vocab(args)
        loader_kwargs = {
            'question_h5': input_question_h5,
            'feature_h5': input_features_h5,
            'vocab': vocab,
            'batch_size': args.batch_size,
        }
        if args.num_samples is not None and args.num_samples > 0:
            loader_kwargs['max_samples'] = args.num_samples
        if args.family_split_file is not None:
            with open(args.family_split_file, 'r') as f:
                loader_kwargs['question_families'] = json.load(f)
        with ClevrDataLoader(**loader_kwargs) as loader:
            run_batch(args, model, dtype, loader)
def get_execution_engine(args):
  vocab = utils.load_vocab(args.vocab_json)
  if args.execution_engine_start_from is not None:
    ee, kwargs = utils.load_execution_engine(
      args.execution_engine_start_from, model_type=args.model_type)
  else:
    kwargs = {
      'vocab': vocab,
      'feature_dim': parse_int_list(args.feature_dim),
      'stem_batchnorm': args.module_stem_batchnorm == 1,
      'stem_num_layers': args.module_stem_num_layers,
      'module_dim': args.module_dim,
      'module_residual': args.module_residual == 1,
      'module_batchnorm': args.module_batchnorm == 1,
      'classifier_proj_dim': args.classifier_proj_dim,
      'classifier_downsample': args.classifier_downsample,
      'classifier_fc_layers': parse_int_list(args.classifier_fc_dims),
      'classifier_batchnorm': args.classifier_batchnorm == 1,
      'classifier_dropout': args.classifier_dropout,
      
      'encoder_vocab_size': len(vocab['question_token_to_idx']),
      'decoder_vocab_size': len(vocab['program_token_to_idx']),
      'wordvec_dim': args.rnn_wordvec_dim,
      'hidden_dim': args.rnn_hidden_dim,
      'rnn_num_layers': args.rnn_num_layers,
      'rnn_dropout': args.rnn_dropout, # 0e-2
    }
    if args.model_type == 'FiLM':
      kwargs['num_modules'] = args.num_modules
      kwargs['stem_kernel_size'] = args.module_stem_kernel_size
      kwargs['stem_stride'] = args.module_stem_stride
      kwargs['stem_padding'] = args.module_stem_padding
      kwargs['module_num_layers'] = args.module_num_layers
      kwargs['module_batchnorm_affine'] = args.module_batchnorm_affine == 1
      kwargs['module_dropout'] = args.module_dropout
      kwargs['module_input_proj'] = args.module_input_proj
      kwargs['module_kernel_size'] = args.module_kernel_size
      kwargs['use_gamma'] = args.use_gamma == 1
      kwargs['use_beta'] = args.use_beta == 1
      kwargs['use_coords'] = args.use_coords
      kwargs['debug_every'] = args.debug_every
      kwargs['print_verbose_every'] = args.print_verbose_every
      kwargs['condition_method'] = args.condition_method
      kwargs['condition_pattern'] = parse_int_list(args.condition_pattern)
      
      kwargs['parameter_efficient'] = args.program_generator_parameter_efficient == 1
      kwargs['output_batchnorm'] = args.rnn_output_batchnorm == 1 
      kwargs['bidirectional'] = args.bidirectional == 1 
      kwargs['rnn_time_step'] = args.rnn_time_step
      kwargs['encoder_type'] = args.encoder_type 
      kwargs['decoder_type'] = args.decoder_type 
      kwargs['gamma_option'] = args.gamma_option
      kwargs['gamma_baseline'] = args.gamma_baseline # 1
      
      ee = FiLMedNet(**kwargs)
    else:
      ee = ModuleNet(**kwargs)
  ee.cuda()
  ee.train()
  return ee, kwargs
Example #4
0
def get_baseline_model(args):
    vocab = utils.load_vocab(args.vocab_json)
    if args.baseline_start_from is not None:
        model, kwargs = utils.load_baseline(args.baseline_start_from)
    elif args.model_type == 'LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = LstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'cnn_num_res_blocks': args.cnn_num_res_blocks,
            'cnn_res_block_dim': args.cnn_res_block_dim,
            'cnn_proj_dim': args.cnn_proj_dim,
            'cnn_pooling': args.cnn_pooling,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM+SA':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'stacked_attn_dim': args.stacked_attn_dim,
            'num_stacked_attn': args.num_stacked_attn,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmSaModel(**kwargs)
    if model.rnn.token_to_idx != vocab['question_token_to_idx']:
        # Make sure new vocab is superset of old
        for k, v in model.rnn.token_to_idx.items():
            assert k in vocab['question_token_to_idx']
            assert vocab['question_token_to_idx'][k] == v
        for token, idx in vocab['question_token_to_idx'].items():
            model.rnn.token_to_idx[token] = idx
        kwargs['vocab'] = vocab
        model.rnn.expand_vocab(vocab['question_token_to_idx'])
    model.cuda()
    model.train()
    return model, kwargs
Example #5
0
def main(args):
    if args.randomize_checkpoint_path == 1:
        name, ext = os.path.splitext(args.checkpoint_path)
        num = random.randint(1, 1000000)
        args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
    print('Will save checkpoints to %s' % args.checkpoint_path)

    vocab = utils.load_vocab(args.vocab_json)

    if args.use_local_copies == 1:
        shutil.copy(args.train_question_h5, '/tmp/train_questions.h5')
        shutil.copy(args.train_features_h5, '/tmp/train_features.h5')
        shutil.copy(args.val_question_h5, '/tmp/val_questions.h5')
        shutil.copy(args.val_features_h5, '/tmp/val_features.h5')
        args.train_question_h5 = '/tmp/train_questions.h5'
        args.train_features_h5 = '/tmp/train_features.h5'
        args.val_question_h5 = '/tmp/val_questions.h5'
        args.val_features_h5 = '/tmp/val_features.h5'

    question_families = None
    if args.family_split_file is not None:
        with open(args.family_split_file, 'r') as f:
            question_families = json.load(f)

    train_loader_kwargs = {
        'question_h5': args.train_question_h5,
        'feature_h5': args.train_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'shuffle': args.shuffle_train_data == 1,
        'question_families': question_families,
        'min_program_depth': args.min_program_depth,
        'max_program_depth': args.max_program_depth,
        'max_samples': args.num_train_samples,
        'num_workers': args.loader_num_workers,
        'drop_last': True,
    }
    val_loader_kwargs = {
        'question_h5': args.val_question_h5,
        'feature_h5': args.val_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'question_families': question_families,
        'min_program_depth': args.min_program_depth,
        'max_program_depth': args.max_program_depth,
        'max_samples': args.num_val_samples,
        'num_workers': args.loader_num_workers,
    }

    with ClevrDataLoader(**train_loader_kwargs) as train_loader, \
         ClevrDataLoader(**val_loader_kwargs) as val_loader:
        train_loop(args, train_loader, val_loader)

    if args.use_local_copies == 1 and args.cleanup_local_copies == 1:
        os.remove('/tmp/train_questions.h5')
        os.remove('/tmp/train_features.h5')
        os.remove('/tmp/val_questions.h5')
        os.remove('/tmp/val_features.h5')
Example #6
0
def main(args):
  assert args.min_program_depth == args.max_program_depth, \
    "This script is for validating at one singular depth."

  if args.randomize_checkpoint_path == 1:
    name, ext = os.path.splitext(args.checkpoint_path)
    num = random.randint(1, 1000000)
    args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
  print('Will save checkpoints to %s' % args.checkpoint_path)

  vocab = utils.load_vocab(args.vocab_json)

  if args.use_local_copies == 1:
    shutil.copy(args.train_question_h5, '/tmp/train_questions.h5')
    shutil.copy(args.train_features_h5, '/tmp/train_features.h5')
    shutil.copy(args.val_question_h5, '/tmp/val_questions.h5')
    shutil.copy(args.val_features_h5, '/tmp/val_features.h5')
    args.train_question_h5 = '/tmp/train_questions.h5'
    args.train_features_h5 = '/tmp/train_features.h5'
    args.val_question_h5 = '/tmp/val_questions.h5'
    args.val_features_h5 = '/tmp/val_features.h5'

  question_families = None
  if args.family_split_file is not None:
    with open(args.family_split_file, 'r') as f:
      question_families = json.load(f)

  val_loader_kwargs = {
    'question_h5': args.val_question_h5,
    'feature_h5': args.val_features_h5,
    'vocab': vocab,
    'batch_size': args.batch_size,
    'question_families': question_families,
    'min_program_depth': args.min_program_depth,
    'max_program_depth': args.max_program_depth,
    'max_samples': args.num_val_samples,
    'num_workers': args.loader_num_workers,
  }

  with ClevrDataLoader(**val_loader_kwargs) as val_loader:
    val_acc = None
    if len(val_loader) > 0:
      val_acc = validation_procedure(args, val_loader)

  if val_acc is not None:
    depth_accs = dict()
    if os.path.exists(args.output_json):
      with open(args.output_json) as fd:
        depth_accs = json.load(fd)

    program_depth = args.min_program_depth
    if program_depth not in depth_accs:
      depth_accs[program_depth] = val_acc
      with open(args.output_json,'w') as fd:
        json.dump(depth_accs, fd)
    else:
      print(f'WARNING: depth {program_depth} is already in document.')
Example #7
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_visible
    if args.randomize_checkpoint_path == 1:  # default 0
        name, ext = os.path.splitext(args.checkpoint_path)
        num = random.randint(1, 1000000)
        args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
    print('Will save checkpoints to %s' % args.checkpoint_path)

    vocab = utils.load_vocab(args.vocab_json)

    if args.use_local_copies == 1:  # default 0
        shutil.copy(args.train_question_h5, '/tmp/train_questions.h5')
        shutil.copy(args.train_features_h5, '/tmp/train_features.h5')
        shutil.copy(args.val_question_h5, '/tmp/val_questions.h5')
        shutil.copy(args.val_features_h5, '/tmp/val_features.h5')
        args.train_question_h5 = '/tmp/train_questions.h5'
        args.train_features_h5 = '/tmp/train_features.h5'
        args.val_question_h5 = '/tmp/val_questions.h5'
        args.val_features_h5 = '/tmp/val_features.h5'

    question_families = None
    if args.family_split_file is not None:  # default None
        with open(args.family_split_file, 'r') as f:
            question_families = json.load(f)

    train_loader_kwargs = {
        'question_h5': args.train_question_h5,  # path
        'feature_h5': args.train_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'shuffle': args.shuffle_train_data == 1,
        'question_families': question_families,  # None
        'max_samples': args.num_train_samples,  # None
        'num_workers': args.loader_num_workers,  # 1,default
    }
    val_loader_kwargs = {
        'question_h5': args.val_question_h5,
        'feature_h5': args.val_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'question_families': question_families,
        'max_samples': args.num_val_samples,
        'num_workers': args.loader_num_workers,
    }

    with ClevrDataLoader(**train_loader_kwargs) as train_loader, \
         ClevrDataLoader(**val_loader_kwargs) as val_loader:
        train_loop(args, train_loader, val_loader)

    if args.use_local_copies == 1 and args.cleanup_local_copies == 1:  # 0
        os.remove('/tmp/train_questions.h5')
        os.remove('/tmp/train_features.h5')
        os.remove('/tmp/val_questions.h5')
        os.remove('/tmp/val_features.h5')
Example #8
0
def check_accuracy(args, program_generator, execution_engine, baseline_model,
                   loader):
    set_mode('eval', [program_generator, execution_engine, baseline_model])
    num_correct, num_samples = 0, 0
    for batch in loader:
        questions, _, feats, answers, programs, _ = batch
        if isinstance(questions, list):
            questions = questions[0]

        questions_var = questions.to(device=args.device)
        feats_var = feats.to(device=args.device)
        answers_var = feats.to(device=args.device)
        # questions_var = Variable(questions.cuda(), volatile=True)
        # feats_var = Variable(feats.cuda(), volatile=True)
        # answers_var = Variable(feats.cuda(), volatile=True)
        if programs[0] is not None:
            programs_var = Variable(programs.cuda(), volatile=True)

        scores = None  # Use this for everything but PG
        if args.model_type == 'PG':
            vocab = utils.load_vocab(args.vocab_json)
            for i in range(questions.size(0)):
                program_pred = program_generator.sample(
                    Variable(questions[i:i + 1].cuda(), volatile=True))
                program_pred_str = vr.preprocess.decode(
                    program_pred, vocab['program_idx_to_token'])
                program_str = vr.preprocess.decode(
                    programs[i], vocab['program_idx_to_token'])
                if program_pred_str == program_str:
                    num_correct += 1
                num_samples += 1
        elif args.model_type == 'EE':
            scores = execution_engine(feats_var, programs_var)
        elif args.model_type == 'PG+EE':
            programs_pred = program_generator.reinforce_sample(questions_var,
                                                               argmax=True)
            scores = execution_engine(feats_var, programs_pred)
        elif args.model_type == 'FiLM':
            programs_pred = program_generator(questions_var)
            scores = execution_engine(feats_var, programs_pred)
        elif args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
            scores = baseline_model(questions_var, feats_var)

        if scores is not None:
            _, preds = scores.data.cpu().max(1)
            num_correct += (preds == answers).sum()
            num_samples += preds.size(0)

        if args.num_val_samples is not None and num_samples >= args.num_val_samples:
            break

    set_mode('train', [program_generator, execution_engine, baseline_model])
    acc = float(num_correct) / num_samples
    return acc
Example #9
0
def get_execution_engine(args):
    vocab = utils.load_vocab(args.vocab_json)
    if args.execution_engine_start_from is not None:
        ee, kwargs = utils.load_execution_engine(
            args.execution_engine_start_from, model_type=args.model_type)
    else:
        kwargs = {
            'vocab': vocab,
            'feature_dim': parse_int_list(args.feature_dim),
            'stem_batchnorm': args.module_stem_batchnorm == 1,
            'stem_num_layers': args.module_stem_num_layers,
            'module_dim': args.module_dim,
            'module_residual': args.module_residual == 1,
            'module_batchnorm': args.module_batchnorm == 1,
            'classifier_proj_dim': args.classifier_proj_dim,
            'classifier_downsample': args.classifier_downsample,
            'classifier_fc_layers': parse_int_list(args.classifier_fc_dims),
            'classifier_batchnorm': args.classifier_batchnorm == 1,
            'classifier_dropout': args.classifier_dropout,
        }
        if args.model_type == 'FiLM':
            kwargs['num_modules'] = args.num_modules
            kwargs['stem_kernel_size'] = args.module_stem_kernel_size
            kwargs['stem_stride'] = args.module_stem_stride
            kwargs['stem_padding'] = args.module_stem_padding
            kwargs['module_num_layers'] = args.module_num_layers
            kwargs[
                'module_batchnorm_affine'] = args.module_batchnorm_affine == 1
            kwargs['module_dropout'] = args.module_dropout
            kwargs['module_input_proj'] = args.module_input_proj
            kwargs['module_kernel_size'] = args.module_kernel_size
            kwargs['use_gamma'] = args.use_gamma == 1
            kwargs['use_beta'] = args.use_beta == 1
            kwargs['use_coords'] = args.use_coords
            kwargs['debug_every'] = args.debug_every
            kwargs['print_verbose_every'] = args.print_verbose_every
            kwargs['condition_method'] = args.condition_method
            kwargs['with_cbn'] = args.with_cbn
            kwargs['final_resblock_with_cbn'] = args.final_resblock_with_cbn
            kwargs['condition_pattern'] = parse_int_list(
                args.condition_pattern)
            ee = FiLMedNet(**kwargs)
        else:
            ee = ModuleNet(**kwargs)
    # if cuda.device_count() > 1:
    #     ee = nn.DataParallel(ee)
    ee.cuda()
    ee.train()
    return ee, kwargs
Example #10
0
def get_execution_engine(args):
  vocab = utils.load_vocab(args.vocab_json)
  if args.execution_engine_start_from is not None:
    ee, kwargs = utils.load_execution_engine(
      args.execution_engine_start_from, model_type=args.model_type)
  else:
    kwargs = {
      'vocab': vocab,
      'feature_dim': parse_int_list(args.feature_dim),
      'stem_batchnorm': args.module_stem_batchnorm == 1,
      'stem_num_layers': args.module_stem_num_layers,
      'module_dim': args.module_dim,
      'module_residual': args.module_residual == 1,
      'module_batchnorm': args.module_batchnorm == 1,
      'classifier_proj_dim': args.classifier_proj_dim,
      'classifier_downsample': args.classifier_downsample,
      'classifier_fc_layers': parse_int_list(args.classifier_fc_dims),
      'classifier_batchnorm': args.classifier_batchnorm == 1,
      'classifier_dropout': args.classifier_dropout,
    }
    if args.model_type.startswith('FiLM'):
      kwargs['num_modules'] = args.num_modules
      kwargs['stem_use_resnet'] = (args.model_type == 'FiLM+ResNet1' or args.model_type == 'FiLM+ResNet0')
      kwargs['stem_resnet_fixed'] = args.model_type == 'FiLM+ResNet0'
      kwargs['stem_kernel_size'] = args.module_stem_kernel_size
      kwargs['stem_stride2_freq'] = args.module_stem_stride2_freq
      kwargs['stem_padding'] = args.module_stem_padding
      kwargs['module_num_layers'] = args.module_num_layers
      kwargs['module_batchnorm_affine'] = args.module_batchnorm_affine == 1
      kwargs['module_dropout'] = args.module_dropout
      kwargs['module_input_proj'] = args.module_input_proj
      kwargs['module_kernel_size'] = args.module_kernel_size
      kwargs['use_gamma'] = args.use_gamma == 1
      kwargs['use_beta'] = args.use_beta == 1
      kwargs['use_coords'] = args.use_coords
      kwargs['debug_every'] = args.debug_every
      kwargs['print_verbose_every'] = args.print_verbose_every
      kwargs['condition_method'] = args.condition_method
      kwargs['condition_pattern'] = parse_int_list(args.condition_pattern)
      ee = FiLMedNet(**kwargs)
    else:
      ee = ModuleNet(**kwargs)
  if torch.cuda.is_available():
    ee.cuda()
  else:
    ee.cpu()
  ee.train()
  return ee, kwargs
Example #11
0
def get_program_generator(args):
    vocab = utils.load_vocab(args.vocab_json)
    if args.program_generator_start_from is not None:  # it is None
        pg, kwargs = utils.load_program_generator(
            args.program_generator_start_from, model_type=args.model_type)
        cur_vocab_size = pg.encoder_embed.weight.size(0)
        if cur_vocab_size != len(vocab['question_token_to_idx']):
            print('Expanding vocabulary of program generator')
            pg.expand_encoder_vocab(vocab['question_token_to_idx'])
            kwargs['encoder_vocab_size'] = len(vocab['question_token_to_idx'])
    else:
        kwargs = {
            'encoder_vocab_size': len(vocab['question_token_to_idx']),
            'decoder_vocab_size': len(vocab['program_token_to_idx']),
            'wordvec_dim': args.rnn_wordvec_dim,
            'hidden_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,  # 0e-2
        }
        if args.model_type == 'FiLM':
            kwargs[
                'parameter_efficient'] = args.program_generator_parameter_efficient == 1
            kwargs['output_batchnorm'] = args.rnn_output_batchnorm == 1
            kwargs['bidirectional'] = args.bidirectional == 1
            kwargs['encoder_type'] = args.encoder_type
            kwargs['decoder_type'] = args.decoder_type
            kwargs['gamma_option'] = args.gamma_option
            kwargs['gamma_baseline'] = args.gamma_baseline
            kwargs['num_modules'] = args.num_modules
            kwargs['module_num_layers'] = args.module_num_layers
            kwargs['module_dim'] = args.module_dim
            kwargs['debug_every'] = args.debug_every
            pg = FiLMGen(**kwargs)
        else:
            pg = Seq2Seq(**kwargs)
    pg.cuda()
    pg.encoder_rnn.flatten_parameters()
    if args.gpu_devices:
        gpu_id = parse_int_list(args.gpu_devices)
        pg = DataParallel(pg, device_ids=gpu_id)
    pg.train()
    pg.module.encoder_rnn.flatten_parameters()
    return pg, kwargs
Example #12
0
def get_program_generator(args):
  vocab = utils.load_vocab(args.vocab_json)
  if args.program_generator_start_from is not None:
    pg, kwargs = utils.load_program_generator(
      args.program_generator_start_from, model_type=args.model_type)
    cur_vocab_size = pg.encoder_embed.weight.size(0)
    if cur_vocab_size != len(vocab['question_token_to_idx']):
      print('Expanding vocabulary of program generator')
      pg.expand_encoder_vocab(vocab['question_token_to_idx'])
      kwargs['encoder_vocab_size'] = len(vocab['question_token_to_idx'])
  else:
    kwargs = {
      'encoder_vocab_size': len(vocab['question_token_to_idx']),
      'decoder_vocab_size': len(vocab['program_token_to_idx']),
      'wordvec_dim': args.rnn_wordvec_dim,
      'hidden_dim': args.rnn_hidden_dim,
      'rnn_num_layers': args.rnn_num_layers,
      'rnn_dropout': args.rnn_dropout,
    }
    if args.model_type.startswith('FiLM'):
      kwargs['parameter_efficient'] = args.program_generator_parameter_efficient == 1
      kwargs['output_batchnorm'] = args.rnn_output_batchnorm == 1
      kwargs['bidirectional'] = args.bidirectional == 1
      kwargs['encoder_type'] = args.encoder_type
      kwargs['decoder_type'] = args.decoder_type
      kwargs['gamma_option'] = args.gamma_option
      kwargs['gamma_baseline'] = args.gamma_baseline
      kwargs['num_modules'] = args.num_modules
      kwargs['module_num_layers'] = args.module_num_layers
      kwargs['module_dim'] = args.module_dim
      kwargs['debug_every'] = args.debug_every
      if args.model_type == 'FiLM+BoW':
        kwargs['encoder_type'] = 'bow'
      pg = FiLMGen(**kwargs)
    else:
      pg = Seq2Seq(**kwargs)
  if torch.cuda.is_available():
    pg.cuda()
  else:
    pg.cpu()
  pg.train()
  return pg, kwargs
Example #13
0
def rewrite_programs(src_dir, dst_dir):
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    vocab = load_vocab(os.path.join(src_dir, 'vocab.json'))
    old_vocab = copy.deepcopy(vocab)
    arity = vocab['program_token_arity']
    program_vocab = vocab['program_idx_to_token']
    question_vocab = vocab['question_idx_to_token']

    # Step 1: change the arity of filters
    for func in arity.keys():
        if needs_shortcut(func):
            arity[func] = 2
    with open(os.path.join(dst_dir, 'vocab.json'), 'w') as dst:
        json.dump(vocab, dst)

    for part in ['train', 'val', 'test']:
        src_questions = "{}/{}_questions.h5".format(src_dir, part)
        dst_questions = "{}/{}_questions.h5".format(dst_dir, part)
        with h5py.File(src_questions) as src_file:
            programs = src_file['programs']
            prog_wshortcuts = []
            for i in range(len(programs)):
                prog_wshortcuts.append(
                    add_shortcuts(programs[i], old_vocab)[1])
            new_max_program_len = max(len(p) for p in prog_wshortcuts)

            shutil.copyfile(src_questions, dst_questions)
            with h5py.File(dst_questions, 'a') as dst_file:
                del dst_file['programs']
                program_dataset = dst_file.create_dataset(
                    'programs', (len(prog_wshortcuts), new_max_program_len),
                    dtype=numpy.int64)
                for i in range(len(prog_wshortcuts)):
                    program_dataset[
                        i, :len(prog_wshortcuts[i])] = prog_wshortcuts[i]
Example #14
0
def train_loop(args, train_loader, val_loader):
    vocab = utils.load_vocab(args.vocab_json)
    program_generator, pg_kwargs, pg_optimizer = None, None, None
    execution_engine, ee_kwargs, ee_optimizer = None, None, None
    baseline_model, baseline_kwargs, baseline_optimizer = None, None, None
    baseline_type = None

    pg_best_state, ee_best_state, baseline_best_state = None, None, None

    # Set up model
    optim_method = getattr(torch.optim, args.optimizer)
    if args.model_type in ['FiLM', 'PG', 'PG+EE']:
        program_generator, pg_kwargs = get_program_generator(args)
        pg_optimizer = optim_method(program_generator.parameters(),
                                    lr=args.learning_rate,
                                    weight_decay=args.weight_decay)
        print('Here is the conditioning network:')
        print(program_generator)
    if args.model_type in ['FiLM', 'EE', 'PG+EE']:
        execution_engine, ee_kwargs = get_execution_engine(args)
        ee_optimizer = optim_method(execution_engine.parameters(),
                                    lr=args.learning_rate,
                                    weight_decay=args.weight_decay)
        print('Here is the conditioned network:')
        print(execution_engine)
    if args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
        baseline_model, baseline_kwargs = get_baseline_model(args)
        params = baseline_model.parameters()
        if args.baseline_train_only_rnn == 1:
            params = baseline_model.rnn.parameters()
        baseline_optimizer = optim_method(params,
                                          lr=args.learning_rate,
                                          weight_decay=args.weight_decay)
        print('Here is the baseline model')
        print(baseline_model)
        baseline_type = args.model_type
    loss_fn = torch.nn.CrossEntropyLoss().cuda()

    stats = {
        'train_losses': [], 'train_rewards': [], 'train_losses_ts': [],
        'train_accs': [], 'val_accs': [], 'val_accs_ts': [],
        'best_val_acc': -1, 'model_t': 0,
    }
    t, epoch, reward_moving_average = 0, 0, 0

    set_mode('train', [program_generator, execution_engine, baseline_model])

    print('train_loader has %d samples' % len(train_loader.dataset))
    print('val_loader has %d samples' % len(val_loader.dataset))

    num_checkpoints = 0
    epoch_start_time = 0.0
    epoch_total_time = 0.0
    train_pass_total_time = 0.0
    val_pass_total_time = 0.0
    running_loss = 0.0
    while t < args.num_iterations:
        if (epoch > 0) and (args.time == 1):
            epoch_time = time.time() - epoch_start_time
            epoch_total_time += epoch_time
            print(colored('EPOCH PASS AVG TIME: ' +
                          str(epoch_total_time / epoch), 'white'))
            print(colored('Epoch Pass Time            : ' +
                          str(epoch_time), 'white'))
        epoch_start_time = time.time()

        epoch += 1
        print('Starting epoch %d' % epoch)
        for batch in train_loader:
            t += 1
            questions, _, feats, answers, programs, _ = batch
            if isinstance(questions, list):
                questions = questions[0]
            questions_var = Variable(questions.cuda())
            feats_var = Variable(feats.cuda())
            answers_var = Variable(answers.cuda())
            if programs[0] is not None:
                programs_var = Variable(programs.cuda())

            reward = None
            if args.model_type == 'PG':
                # Train program generator with ground-truth programs
                pg_optimizer.zero_grad()
                loss = program_generator(questions_var, programs_var)
                loss.backward()
                pg_optimizer.step()
            elif args.model_type == 'EE':
                # Train execution engine with ground-truth programs
                ee_optimizer.zero_grad()
                scores = execution_engine(feats_var, programs_var)
                loss = loss_fn(scores, answers_var)
                loss.backward()
                ee_optimizer.step()
            elif args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
                baseline_optimizer.zero_grad()
                baseline_model.zero_grad()
                scores = baseline_model(questions_var, feats_var)
                loss = loss_fn(scores, answers_var)
                loss.backward()
                baseline_optimizer.step()
            elif args.model_type == 'PG+EE':
                programs_pred = program_generator.reinforce_sample(
                    questions_var)
                scores = execution_engine(feats_var, programs_pred)

                loss = loss_fn(scores, answers_var)
                _, preds = scores.data.cpu().max(1)
                raw_reward = (preds == answers).float()
                reward_moving_average *= args.reward_decay
                reward_moving_average += ((1.0 - args.reward_decay) *
                                          raw_reward.mean())
                centered_reward = raw_reward - reward_moving_average

                if args.train_execution_engine == 1:
                    ee_optimizer.zero_grad()
                    loss.backward()
                    ee_optimizer.step()

                if args.train_program_generator == 1:
                    pg_optimizer.zero_grad()
                    program_generator.reinforce_backward(
                        centered_reward.cuda())
                    pg_optimizer.step()
            elif args.model_type == 'FiLM':
                if args.set_execution_engine_eval == 1:
                    set_mode('eval', [execution_engine])
                programs_pred = program_generator(questions_var)
                scores = execution_engine(feats_var, programs_pred)
                loss = loss_fn(scores, answers_var)

                pg_optimizer.zero_grad()
                ee_optimizer.zero_grad()
                if args.debug_every <= -2:
                    pdb.set_trace()
                loss.backward()
                if args.debug_every < float('inf'):
                    check_grad_num_nans(execution_engine, 'FiLMedNet')
                    check_grad_num_nans(program_generator, 'FiLMGen')

                if args.train_program_generator == 1:
                    if args.grad_clip > 0:
                        torch.nn.utils.clip_grad_norm(
                            program_generator.parameters(), args.grad_clip)
                    pg_optimizer.step()
                if args.train_execution_engine == 1:
                    if args.grad_clip > 0:
                        torch.nn.utils.clip_grad_norm(
                            execution_engine.parameters(), args.grad_clip)
                    ee_optimizer.step()

            if t % args.record_loss_every == 0:
                running_loss += loss.data[0]
                avg_loss = running_loss / args.record_loss_every
                print(t, avg_loss)
                stats['train_losses'].append(avg_loss)
                stats['train_losses_ts'].append(t)
                if reward is not None:
                    stats['train_rewards'].append(reward)
                running_loss = 0.0
            else:
                running_loss += loss.data[0]

            if t % args.checkpoint_every == 0:
                num_checkpoints += 1
                print('Checking training accuracy ... ')
                start = time.time()
                train_acc = check_accuracy(
                    args, program_generator, execution_engine, baseline_model,
                    train_loader)
                if args.time == 1:
                    train_pass_time = (time.time() - start)
                    train_pass_total_time += train_pass_time
                    print(colored('TRAIN PASS AVG TIME: ' +
                                  str(train_pass_total_time / num_checkpoints),
                                  'red'))
                    print(colored('Train Pass Time            : ' +
                                  str(train_pass_time), 'red'))
                print('train accuracy is', train_acc)
                print('Checking validation accuracy ...')
                start = time.time()
                val_acc = check_accuracy(
                    args, program_generator, execution_engine, baseline_model,
                    val_loader)
                if args.time == 1:
                    val_pass_time = (time.time() - start)
                    val_pass_total_time += val_pass_time
                    print(colored('VAL PASS AVG TIME:     ' +
                                  str(val_pass_total_time / num_checkpoints),
                                  'cyan'))
                    print(colored('Val Pass Time                : ' +
                                  str(val_pass_time), 'cyan'))
                print('val accuracy is ', val_acc)
                stats['train_accs'].append(train_acc)
                stats['val_accs'].append(val_acc)
                stats['val_accs_ts'].append(t)

                if val_acc > stats['best_val_acc']:
                    stats['best_val_acc'] = val_acc
                    stats['model_t'] = t
                    best_pg_state = get_state(program_generator)
                    best_ee_state = get_state(execution_engine)
                    best_baseline_state = get_state(baseline_model)

                checkpoint = {
                    'args': args.__dict__,
                    'program_generator_kwargs': pg_kwargs,
                    'program_generator_state': best_pg_state,
                    'execution_engine_kwargs': ee_kwargs,
                    'execution_engine_state': best_ee_state,
                    'baseline_kwargs': baseline_kwargs,
                    'baseline_state': best_baseline_state,
                    'baseline_type': baseline_type,
                    'vocab': vocab
                }
                for k, v in stats.items():
                    checkpoint[k] = v
                print('Saving checkpoint to %s' % args.checkpoint_path)
                torch.save(checkpoint, args.checkpoint_path)
                del checkpoint['program_generator_state']
                del checkpoint['execution_engine_state']
                del checkpoint['baseline_state']
                with open(args.checkpoint_path + '.json', 'w') as f:
                    json.dump(checkpoint, f)

            if t == args.num_iterations:
                break
def check_accuracy(args, scene2action_model, program_generator,
                   execution_engine, baseline_model, loader):
    ##### Modified #####
    set_mode('eval', [
        scene2action_model, program_generator, execution_engine, baseline_model
    ])
    num_correct, num_samples = 0, 0
    for batch in loader:
        ##### Modified #####
        questions, _, feats, feats_aux, answers, programs, _ = batch

        if isinstance(questions, list):
            questions = questions[0]

        questions_var = Variable(questions.cuda(), volatile=True)
        feats_var = Variable(feats.cuda(), volatile=True)
        ##### Modified #####
        feats_var_aux = Variable(feats_aux.cuda(), volatile=True)
        answers_var = Variable(feats.cuda(), volatile=True)
        if programs[0] is not None:
            programs_var = Variable(programs.cuda(), volatile=True)

        ##### Modified #####------------------------------------------
        ### Preprocess batch started ###

        # For each data in the current batch (step-by-step)
        for turn in range(feats_var.size(0)):
            current_action = -1
            current_count = 0
            current_scene = feats_var[turn]
            current_question = questions_var[turn]

            while ((current_action != args.end_action_index)
                   and (current_count < args.maximum_action_number)):
                current_count = current_count + 1

                current_action, _ = scene2action_model(current_scene,
                                                       current_question)
                if (current_action != args.end_action_index):
                    if (current_action == 0):
                        current_scene = current_scene + feats_var[turn]
                    else:
                        current_scene = current_scene + feats_var_aux[turn][
                            current_action - 1]

            ### Prepare data for VQA model ###
            feats_var[turn] = current_scene
        ### Preprocess batch ended ###
        ##### Modified #####------------------------------------------

        scores = None  # Use this for everything but PG
        if args.model_type == 'PG':
            vocab = utils.load_vocab(args.vocab_json)
            for i in range(questions.size(0)):
                program_pred = program_generator.sample(
                    Variable(questions[i:i + 1].cuda(), volatile=True))
                program_pred_str = vr.preprocess.decode(
                    program_pred, vocab['program_idx_to_token'])
                program_str = vr.preprocess.decode(
                    programs[i], vocab['program_idx_to_token'])
                if program_pred_str == program_str:
                    num_correct += 1
                num_samples += 1
        elif args.model_type == 'EE':
            scores = execution_engine(feats_var, programs_var)
        elif args.model_type == 'PG+EE':
            programs_pred = program_generator.reinforce_sample(questions_var,
                                                               argmax=True)
            scores = execution_engine(feats_var, programs_pred)
        elif args.model_type == 'FiLM':
            programs_pred = program_generator(questions_var)
            scores = execution_engine(feats_var, programs_pred)
        elif args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
            scores = baseline_model(questions_var, feats_var)

        if scores is not None:
            _, preds = scores.data.cpu().max(1)
            num_correct += (preds == answers).sum()
            num_samples += preds.size(0)

        if args.num_val_samples is not None and num_samples >= args.num_val_samples:
            break

    set_mode('train', [
        scene2action_model, program_generator, execution_engine, baseline_model
    ])
    acc = float(num_correct) / num_samples
    return acc
def train_loop(args, train_loader, val_loader, writer):
    vocab = utils.load_vocab(args.vocab_json)
    program_generator, pg_kwargs, pg_optimizer = None, None, None
    execution_engine, ee_kwargs, ee_optimizer = None, None, None
    baseline_model, baseline_kwargs, baseline_optimizer = None, None, None
    baseline_type = None

    pg_best_state, ee_best_state, baseline_best_state = None, None, None

    # Set up model
    optim_method = getattr(torch.optim, args.optimizer)

    ##### Modified #####
    scene2action_model, scene2action_optimizer = None, None

    s2a_kwargs = {
        'feat_dim': args.scene2action_feat_dim,
        'hidden_dim': args.scene2action_hidden_dim,
        'action_dim': args.scene2action_action_dim,
        'dropout': args.scene2action_dropout,
        'word_vocab_size': args.scene2action_word_vocab_size,
        'word_embed_size': args.scene2action_word_embed_size,
        'lstm_hidden_size': args.scene2action_lstm_hidden_size,
        'lstm_num_layers': args.scene2action_lstm_num_layers,
    }

    scene2action_model = Scene2Action(**s2a_kwargs)
    scene2action_model.cuda()
    scene2action_model.train()
    scene2action_optimizer = optim_method(scene2action_model.parameters(),
                                          lr=args.learning_rate,
                                          weight_decay=args.weight_decay)

    print("Here is the scene to action network: ")
    print(scene2action_model)

    if args.model_type in ['FiLM', 'PG', 'PG+EE']:
        program_generator, pg_kwargs = get_program_generator(args)
        pg_optimizer = optim_method(program_generator.parameters(),
                                    lr=args.learning_rate,
                                    weight_decay=args.weight_decay)
        print('Here is the conditioning network:')
        print(program_generator)
    if args.model_type in ['FiLM', 'EE', 'PG+EE']:
        execution_engine, ee_kwargs = get_execution_engine(args)
        ee_optimizer = optim_method(execution_engine.parameters(),
                                    lr=args.learning_rate,
                                    weight_decay=args.weight_decay)
        print('Here is the conditioned network:')
        print(execution_engine)
    if args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
        baseline_model, baseline_kwargs = get_baseline_model(args)
        params = baseline_model.parameters()
        if args.baseline_train_only_rnn == 1:
            params = baseline_model.rnn.parameters()
        baseline_optimizer = optim_method(params,
                                          lr=args.learning_rate,
                                          weight_decay=args.weight_decay)
        print('Here is the baseline model')
        print(baseline_model)
        baseline_type = args.model_type

    loss_fn = torch.nn.CrossEntropyLoss().cuda()

    ##### Modified #####
    loss_s2a = torch.nn.CrossEntropyLoss().cuda()

    ##### Modified #####
    stats = {
        'train_losses': [],
        'train_rewards': [],
        'train_losses_ts': [],
        'train_accs': [],
        'val_accs': [],
        'val_accs_ts': [],
        'best_val_acc': -1,
        'model_t': 0,
        'train_losses_s2a': [],
    }
    t, epoch, reward_moving_average = 0, 0, 0

    ##### Modified #####
    set_mode('train', [
        scene2action_model, program_generator, execution_engine, baseline_model
    ])

    print('train_loader has %d samples' % len(train_loader.dataset))
    print('val_loader has %d samples' % len(val_loader.dataset))

    num_checkpoints = 0
    epoch_start_time = 0.0
    epoch_total_time = 0.0
    train_pass_total_time = 0.0
    val_pass_total_time = 0.0
    running_loss = 0.0

    while t < args.num_iterations:
        if (epoch > 0) and (args.time == 1):
            epoch_time = time.time() - epoch_start_time
            epoch_total_time += epoch_time
            print(
                colored(
                    'EPOCH PASS AVG TIME: ' + str(epoch_total_time / epoch),
                    'white'))
            print(colored('Epoch Pass Time      : ' + str(epoch_time),
                          'white'))
        epoch_start_time = time.time()

        epoch += 1

        print('Starting epoch %d' % epoch)

        for batch in train_loader:

            t += 1
            print(
                "Current batch " + str(t)
            )  #------------------------------------------------------------------------------------#

            ##### Modified #####
            questions, _, feats, feats_aux, answers, programs, _ = batch
            if isinstance(questions, list):
                questions = questions[0]
            questions_var = Variable(questions.cuda())

            feats_var = Variable(feats.cuda())
            ##### Modified ####
            feats_var_aux = Variable(feats_aux.cuda())
            answers_var = Variable(answers.cuda())

            #print("answers var 0 " + str(answers_var.size(0))) ### 64

            if programs[0] is not None:
                programs_var = Variable(programs.cuda())

            reward = None

            ##### Modified #####---------------------------------------------------------
            ### Preprocess batch started ###

            # For each data in the current batch (step-by-step)

            avg_action_length = 0.0
            total_repeated_rate = 0.0
            count_turn = 0

            for turn in range(feats_var.size(0)):
                #print("Current turn " + str(turn)) #------------------------------------------------------------------------------------#

                set_mode('eval', [program_generator, execution_engine])
                current_action = -1
                current_count = 0
                current_scene = feats_var[turn]
                current_question = questions_var[turn]
                current_answer = answers_var[turn]
                actions = []
                flag = 0

                while ((current_action != args.end_action_index)
                       and (current_count < args.maximum_action_number)):
                    current_count += 1
                    current_action, action_propability = scene2action_model(
                        current_scene, current_question)
                    actions.append(current_action.item())
                    #print("Current_action " + str(current_action))
                    #print("action_propability " + str(action_propability))

                    #print("current action after " + str(current_action))
                    if (current_action != args.end_action_index):
                        if (current_action == 0):
                            current_scene = current_scene + feats_var[turn]
                        else:
                            current_scene = current_scene + feats_var_aux[
                                turn][current_action - 1]  ##### To check #####

                    temp_question = current_question.view(
                        1, -1).clone()  ##### To check #####
                    programs_pred = program_generator(temp_question)
                    current_scene = current_scene.view(
                        1, 256, 16, 16)  ##### To check #####
                    scores = execution_engine(current_scene, programs_pred)

                    current_answer = current_answer.view(
                        -1)  ##### To check #####
                    loss_current = loss_fn(scores, current_answer)

                    if (flag == 0):
                        Minus_reward_currentturn = torch.zeros(1, 1)
                        Minus_reward_currentturn[0][0] = torch.from_numpy(
                            np.array(loss_current.data.cpu().numpy()))
                    else:
                        temp = torch.zeros(1, 1)
                        temp[0][0] = torch.from_numpy(
                            np.array(loss_current.data.cpu().numpy()))
                        Minus_reward_currentturn = torch.cat(
                            (Minus_reward_currentturn, temp), 0)  ### ??? ###

                    if (flag == 0):
                        Logits_currentturn = torch.zeros(1, 1)
                        Logits_currentturn[0][0] = torch.from_numpy(
                            np.array(action_propability.data.cpu().numpy()))
                        flag = flag + 1
                    else:
                        temp = torch.zeros(1, 1)
                        temp[0][0] = torch.from_numpy(
                            np.array(action_propability.data.cpu().numpy()))

                        Logits_currentturn = torch.cat(
                            (Logits_currentturn, temp),
                            0)  ##### To check #####

                #print("Current Action " + str(current_action)) #------------------------------------------------------------------------------------#

                vt = decay_normalize_loss(Minus_reward_currentturn,
                                          args.scene2action_gamma)
                Logits_currentturn = Logits_currentturn.cuda()

                vt = vt.cuda()

                current_route_length = torch.tensor(len(actions))
                current_route_length_cuda = current_route_length.cuda()
                ##### Modified #####
                ### Add route loss here ### route_loss_rate
                loss_current_s2a = torch.mean(
                    Logits_currentturn *
                    vt) + args.route_loss_rate * current_route_length_cuda
                loss_current_s2a.requires_grad = True

                scene2action_optimizer.zero_grad()

                loss_current_s2a = loss_current_s2a.cuda()
                loss_current_s2a.backward()
                scene2action_optimizer.step()

                feats_var[turn] = current_scene

                avg_action_length = avg_action_length + len(actions)

                repeated_actions = 0
                for a1 in range(len(actions) - 1):
                    for a2 in range(a1 + 1, len(actions)):
                        if actions[a1] == actions[a2]:
                            repeated_actions = repeated_actions + 1
                            break
                total_repeated_rate = total_repeated_rate + (
                    repeated_actions) / (float)(len(actions))
                count_turn = count_turn + 1

            writer.add_scalar('scene2action_loss', loss_current_s2a.item(), t)

            ### Add avg length, no_repeated rate
            writer.add_scalar('avg_route_length',
                              avg_action_length / (float)(count_turn), t)
            writer.add_scalar('avg_repeat_rate',
                              total_repeated_rate / (float)(count_turn), t)

            ### Turn actions to image ###
            image_ = torch.zeros(3, 13, 13)
            print("actions ----------------------- ")
            print(actions)

            for ii_ in range(len(actions)):
                image_[0][actions[ii_]][ii_] = 0
                image_[1][actions[ii_]][ii_] = 1
                image_[2][actions[ii_]][ii_] = 1

            writer.add_image('selected_viewpoints', image_, t)

            set_mode('train', [
                scene2action_model, program_generator, execution_engine,
                baseline_model
            ])
            #exit()
            ### Preprocess batch ended ###
            ##### Modified #####---------------------------------------------------------

            if args.model_type == 'PG':
                # Train program generator with ground-truth programs
                pg_optimizer.zero_grad()
                loss = program_generator(questions_var, programs_var)
                loss.backward()
                pg_optimizer.step()
            elif args.model_type == 'EE':
                # Train execution engine with ground-truth programs
                ee_optimizer.zero_grad()
                scores = execution_engine(feats_var, programs_var)
                loss = loss_fn(scores, answers_var)
                loss.backward()
                ee_optimizer.step()
            elif args.model_type in ['LSTM', 'CNN+LSTM', 'CNN+LSTM+SA']:
                baseline_optimizer.zero_grad()
                baseline_model.zero_grad()
                scores = baseline_model(questions_var, feats_var)
                loss = loss_fn(scores, answers_var)
                loss.backward()
                baseline_optimizer.step()
            elif args.model_type == 'PG+EE':
                programs_pred = program_generator.reinforce_sample(
                    questions_var)
                scores = execution_engine(feats_var, programs_pred)

                loss = loss_fn(scores, answers_var)
                _, preds = scores.data.cpu().max(1)
                raw_reward = (preds == answers).float()
                reward_moving_average *= args.reward_decay
                reward_moving_average += (
                    1.0 - args.reward_decay) * raw_reward.mean()
                centered_reward = raw_reward - reward_moving_average

                if args.train_execution_engine == 1:
                    ee_optimizer.zero_grad()
                    loss.backward()
                    ee_optimizer.step()

                if args.train_program_generator == 1:
                    pg_optimizer.zero_grad()
                    program_generator.reinforce_backward(
                        centered_reward.cuda())
                    pg_optimizer.step()
            elif args.model_type == 'FiLM':
                if args.set_execution_engine_eval == 1:
                    set_mode('eval', [execution_engine])
                programs_pred = program_generator(questions_var)
                scores = execution_engine(feats_var, programs_pred)
                loss = loss_fn(scores, answers_var)

                pg_optimizer.zero_grad()
                ee_optimizer.zero_grad()
                if args.debug_every <= -2:
                    pdb.set_trace()
                loss.backward()
                if args.debug_every < float('inf'):
                    check_grad_num_nans(execution_engine, 'FiLMedNet')
                    check_grad_num_nans(program_generator, 'FiLMGen')

                if args.train_program_generator == 1:
                    if args.grad_clip > 0:
                        torch.nn.utils.clip_grad_norm(
                            program_generator.parameters(), args.grad_clip)
                    pg_optimizer.step()
                if args.train_execution_engine == 1:
                    if args.grad_clip > 0:
                        torch.nn.utils.clip_grad_norm(
                            execution_engine.parameters(), args.grad_clip)
                    ee_optimizer.step()

            if t % args.record_loss_every == 0:
                running_loss += loss.data.item()
                avg_loss = running_loss / args.record_loss_every
                print(t, avg_loss)
                stats['train_losses'].append(avg_loss)
                stats['train_losses_ts'].append(t)
                ##### Modified #####
                stats['train_losses_s2a'].append(loss_current_s2a)
                if reward is not None:
                    stats['train_rewards'].append(reward)
                running_loss = 0.0
            else:
                running_loss += loss.data.item()

            writer.add_scalar('train_losses', loss.data.item(), t)
            if t % args.checkpoint_every == 0:
                num_checkpoints += 1
                print('Checking training accuracy ... ')
                start = time.time()
                """
        ##### Modified #####
        train_acc = check_accuracy(args, scene2action_model, program_generator, execution_engine,
                                   baseline_model, train_loader)
        if args.time == 1:
          train_pass_time = (time.time() - start)
          train_pass_total_time += train_pass_time
          print(colored('TRAIN PASS AVG TIME: ' + str(train_pass_total_time / num_checkpoints), 'red'))
          print(colored('Train Pass Time      : ' + str(train_pass_time), 'red'))
        print('train accuracy is', train_acc)
        print('Checking validation accuracy ...')
        start = time.time()
        ##### Modified #####
        """
                val_acc = check_accuracy(args, scene2action_model,
                                         program_generator, execution_engine,
                                         baseline_model, val_loader)
                if args.time == 1:
                    val_pass_time = (time.time() - start)
                    val_pass_total_time += val_pass_time
                    print(
                        colored(
                            'VAL PASS AVG TIME:   ' +
                            str(val_pass_total_time / num_checkpoints),
                            'cyan'))
                    print(
                        colored('Val Pass Time        : ' + str(val_pass_time),
                                'cyan'))
                print('val accuracy is ', val_acc)

                ### Val_acc
                writer.add_scalar('val_acc', val_acc, t)
                """
        stats['train_accs'].append(train_acc)
        stats['val_accs'].append(val_acc)
        stats['val_accs_ts'].append(t)

        #if val_acc > stats['best_val_acc']:
        stats['best_val_acc'] = val_acc
        stats['model_t'] = t
        """
                ##### Modified #####
                best_scene2action_state = get_state(scene2action_model)
                best_pg_state = get_state(program_generator)
                best_ee_state = get_state(execution_engine)
                best_baseline_state = get_state(baseline_model)

                ##### Modified #####
                checkpoint = {
                    'args': args.__dict__,
                    'scene2action_kwargs': s2a_kwargs,
                    'scene2action_state': best_scene2action_state,
                    'program_generator_kwargs': pg_kwargs,
                    'program_generator_state': best_pg_state,
                    'execution_engine_kwargs': ee_kwargs,
                    'execution_engine_state': best_ee_state,
                    'baseline_kwargs': baseline_kwargs,
                    'baseline_state': best_baseline_state,
                    'baseline_type': baseline_type,
                    'vocab': vocab
                }
                for k, v in stats.items():
                    checkpoint[k] = v
                print('Saving checkpoint to %s' % args.checkpoint_path)
                torch.save(checkpoint,
                           args.checkpoint_path + '_' + str(t) + '.pt')
                ##### Modified #####
                del checkpoint['scene2action_state']
                del checkpoint['program_generator_state']
                del checkpoint['execution_engine_state']
                del checkpoint['baseline_state']
                #with open(args.checkpoint_path + '.json', 'w') as f:
                #  json.dump(checkpoint, f)

            if t == args.num_iterations:
                break
def main(args):
    torch.autograd.set_detect_anomaly(True)
    # for reproducibility
    torch.cuda.set_device(args.device_idd)
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    torch.backends.cudnn.benckmark = False
    torch.backends.cudnn.deterministic = True

    if args.randomize_checkpoint_path == 1:
        name, ext = os.path.splitext(args.checkpoint_path)
        num = random.randint(1, 1000000)
        args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
    print('Will save checkpoints to %s' % args.checkpoint_path)

    vocab = utils.load_vocab(args.vocab_json)

    ##### Modified #####
    ### For Tensorboard ###
    #log_interval_num = args.log_interval
    #save_interval_num = args.save_interval
    log_dir = os.path.join(args.root_log_dir, args.log_dir)
    os.mkdir(log_dir)
    os.mkdir(os.path.join(log_dir, 'runs'))

    writer = SummaryWriter(log_dir=os.path.join(log_dir, 'runs'))

    ##### Modified #####
    if args.use_local_copies == 1:
        shutil.copy(args.train_question_h5, '/tmp/train_questions.h5')
        shutil.copy(args.train_features_h5, '/tmp/train_features.h5')
        shutil.copy(args.train_features_h5_aux, '/tmp/train_features_aux.h5')

        shutil.copy(args.val_question_h5, '/tmp/val_questions.h5')
        shutil.copy(args.val_features_h5, '/tmp/val_features.h5')
        shutil.copy(args.val_features_h5_aux, '/tmp/val_features_aux.h5')

        args.train_question_h5 = '/tmp/train_questions.h5'
        args.train_features_h5 = '/tmp/train_features.h5'
        args.train_features_h5_aux = '/tmp/train_features_aux.h5'

        args.val_question_h5 = '/tmp/val_questions.h5'
        args.val_features_h5 = '/tmp/val_features.h5'
        args.val_features_h5_aux = '/tmp/val_features_aux.h5'

    question_families = None
    if args.family_split_file is not None:
        with open(args.family_split_file, 'r') as f:
            question_families = json.load(f)

    ##### Modified #####
    train_loader_kwargs = {
        'question_h5': args.train_question_h5,
        'feature_h5': args.train_features_h5,
        'feature_h5_aux': args.train_features_h5_aux,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'shuffle': args.shuffle_train_data == 1,
        'question_families': question_families,
        'max_samples': args.num_train_samples,
        'num_workers': args.loader_num_workers,
    }
    val_loader_kwargs = {
        'question_h5': args.val_question_h5,
        'feature_h5': args.val_features_h5,
        'feature_h5_aux': args.val_features_h5_aux,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'question_families': question_families,
        'max_samples': args.num_val_samples,
        'num_workers': args.loader_num_workers,
    }

    with ClevrDataLoader(**train_loader_kwargs) as train_loader, \
         ClevrDataLoader(**val_loader_kwargs) as val_loader:
        train_loop(args, train_loader, val_loader, writer)

    ##### Modified #####
    if args.use_local_copies == 1 and args.cleanup_local_copies == 1:
        os.remove('/tmp/train_questions.h5')
        os.remove('/tmp/train_features.h5')
        os.remove('/tmp/train_features_aux.h5')
        os.remove('/tmp/val_questions.h5')
        os.remove('/tmp/val_features.h5')
        os.remove('/tmp/val_features_aux.h5')
Example #18
0
def main(args):
    if args.randomize_checkpoint_path == 1:
        name, ext = os.path.splitext(args.checkpoint_path)
        num = random.randint(1, 1000000)
        args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
    print('Will save checkpoints to %s' % args.checkpoint_path)
    if args.data_dir:
        args.train_question_h5 = os.path.join(args.data_dir,
                                              args.train_question_h5)
        args.train_features_h5 = os.path.join(args.data_dir,
                                              args.train_features_h5)
        args.val_question_h5 = os.path.join(args.data_dir,
                                            args.val_question_h5)
        args.val_features_h5 = os.path.join(args.data_dir,
                                            args.val_features_h5)
        args.vocab_json = os.path.join(args.data_dir, args.vocab_json)
    if not args.checkpoint_path:
        if 'SLURM_JOB_ID' in os.environ:
            args.checkpoint_path = os.environ['SLURM_JOB_ID'] + '.pt'
        else:
            raise NotImplementedError()

    vocab = utils.load_vocab(args.vocab_json)

    if args.use_local_copies == 1:
        if os.path.exists('/Tmpfast'):
            tmp = '/Tmpfast/'
        else:
            tmp = '/Tmp/'
        if not os.path.exists(tmp + 'bahdanau'):
            os.mkdir(tmp + 'bahdanau')
        if not os.path.exists(tmp + 'bahdanau/clevr'):
            os.mkdir(tmp + 'bahdanau/clevr')
        root = tmp + 'bahdanau/clevr/'

        def rsync_copy_if_not_exists(src, dst):
            if not os.path.exists(dst):
                os.system("rsync -vrz --progress {} {}".format(src, dst))

        rsync_copy_if_not_exists(args.train_question_h5,
                                 root + 'train_questions.h5')
        rsync_copy_if_not_exists(args.train_features_h5,
                                 root + 'train_features.h5')
        rsync_copy_if_not_exists(args.val_question_h5,
                                 root + 'val_questions.h5')
        rsync_copy_if_not_exists(args.val_features_h5,
                                 root + 'val_features.h5')
        args.train_question_h5 = root + 'train_questions.h5'
        args.train_features_h5 = root + 'train_features.h5'
        args.val_question_h5 = root + 'val_questions.h5'
        args.val_features_h5 = root + 'val_features.h5'

    question_families = None
    if args.family_split_file is not None:
        with open(args.family_split_file, 'r') as f:
            question_families = json.load(f)

    train_loader_kwargs = {
        'question_h5': args.train_question_h5,
        'feature_h5': args.train_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'shuffle': args.shuffle_train_data == 1,
        'question_families': question_families,
        'max_samples': args.num_train_samples,
        'num_workers': args.loader_num_workers,
    }
    val_loader_kwargs = {
        'question_h5': args.val_question_h5,
        'feature_h5': args.val_features_h5,
        'vocab': vocab,
        'batch_size': args.batch_size,
        'question_families': question_families,
        'max_samples': args.num_val_samples,
        'num_workers': args.loader_num_workers,
    }

    with ClevrDataLoader(**train_loader_kwargs) as train_loader, \
         ClevrDataLoader(**val_loader_kwargs) as val_loader:
        args.max_program_module_arity = max(train_loader.max_arity,
                                            val_loader.max_arity)
        args.max_program_tree_depth = max(train_loader.max_depth,
                                          val_loader.max_depth)
        train_loop(args, train_loader, val_loader)

    if args.use_local_copies == 1 and args.cleanup_local_copies == 1:
        os.remove('/tmp/train_questions.h5')
        os.remove('/tmp/train_features.h5')
        os.remove('/tmp/val_questions.h5')
        os.remove('/tmp/val_features.h5')
Example #19
0
    parser.add_argument('--batch_size', default=64, type=int)
    parser.add_argument('--learning_rate', default=5e-4, type=float)
    parser.add_argument('--reward_decay', default=0.9, type=float)
    parser.add_argument('--film_gen_weight_decay', default=0, type=float)
    parser.add_argument('--filmed_net_weight_decay', default=0, type=float)

    args = parser.parse_args()

    exp_dir = os.path.join(args.exp_dir, args.exp_name)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)

    logger = create_logger(os.path.join(exp_dir, 'log.txt'))
    logger.info(args)

    vocab = utils.load_vocab(os.path.join(args.data_dir, 'vocab.json'))

    film_gen = FiLMGen(encoder_vocab_size=len(vocab['question_token_to_idx']),
                       wordvec_dim=args.rnn_wordvec_dim,
                       hidden_dim=args.rnn_hidden_dim,
                       rnn_num_layers=args.rnn_num_layers,
                       rnn_dropout=0,
                       output_batchnorm=False,
                       bidirectional=False,
                       encoder_type=args.encoder_type,
                       decoder_type=args.decoder_type,
                       gamma_option=args.gamma_option,
                       gamma_baseline=1,
                       num_modules=args.num_modules,
                       module_num_layers=args.module_num_layers,
                       module_dim=args.module_dim,
Example #20
0
      language=args.sw_language, config=args.sw_config)
    print('ShapeWorld dataset: {} (variant: {})'.format(dataset, args.sw_variant))
    print('Config: ' + str(args.sw_config))

    dataset = torch_util.ShapeWorldDataset(dataset=dataset,  # include_model=True
      mode=(None if args.sw_mode == 'none' else args.sw_mode), epoch=(args.num_samples is None))

    loader = ShapeWorldDataLoader(dataset=dataset, batch_size=args.batch_size)  # num_workers=1

  model = None
  if args.baseline_model is not None:
    assert args.model_type in ('LSTM', 'CNN+LSTM', 'CNN+LSTM+SA')
    print('Loading baseline model from', args.baseline_model)
    model, _ = utils.load_baseline(args.baseline_model)
    if args.vocab_json is not None:
      new_vocab = utils.load_vocab(args.vocab_json)
      model.rnn.expand_vocab(new_vocab['question_token_to_idx'])
  elif args.program_generator is not None and args.execution_engine is not None:
    pg, _ = utils.load_program_generator(args.program_generator, args.model_type)
    ee, _ = utils.load_execution_engine(
      args.execution_engine, verbose=False, model_type=args.model_type)
    if args.vocab_json is not None:
      new_vocab = utils.load_vocab(args.vocab_json)
      pg.expand_encoder_vocab(new_vocab['question_token_to_idx'])
    model = (pg, ee)
  else:
    print('Must give either --baseline_model or --program_generator and --execution_engine')
    return

  if torch.cuda.is_available():
    dtype = torch.cuda.FloatTensor
Example #21
0
def main(args):
    if args.debug_every <= 1:
        pdb.set_trace()

    if args.sw_name is not None or args.sw_config is not None:
        assert args.image is None and args.question is None

        from shapeworld import Dataset, torch_util
        from shapeworld.datasets import clevr_util

        class ShapeWorldDataLoader(torch_util.ShapeWorldDataLoader):
            def __iter__(self):
                for batch in super(ShapeWorldDataLoader, self).__iter__():
                    if "caption" in batch:
                        question = batch["caption"].long()
                    else:
                        question = batch["question"].long()
                    if args.sw_features == 1:
                        image = batch["world_features"]
                    else:
                        image = batch["world"]
                    feats = image
                    if "agreement" in batch:
                        answer = batch["agreement"].long()
                    else:
                        answer = batch["answer"].long()
                    if "caption_model" in batch:
                        assert args.sw_name.startswith(
                            "clevr") or args.sw_program == 3
                        program_seq = batch["caption_model"]
                        # .apply_(callable=(lambda model: clevr_util.parse_program(mode=0, model=model)))
                    elif "question_model" in batch:
                        program_seq = batch["question_model"]
                    elif "caption" in batch:
                        if args.sw_program == 1:
                            program_seq = batch["caption_pn"].long()
                        elif args.sw_program == 2:
                            program_seq = batch["caption_rpn"].long()
                        else:
                            program_seq = [None]
                    else:
                        program_seq = [None]
                    # program_seq = torch.IntTensor([0 for _ in batch['question']])
                    program_json = dict()
                    yield question, image, feats, answer, program_seq, program_json

        dataset = Dataset.create(
            dtype=args.sw_type,
            name=args.sw_name,
            variant=args.sw_variant,
            language=args.sw_language,
            config=args.sw_config,
        )
        print("ShapeWorld dataset: {} (variant: {})".format(
            dataset, args.sw_variant))
        print("Config: " + str(args.sw_config))

        if args.program_generator is not None:
            with open(args.program_generator + ".vocab", "r") as filehandle:
                vocab = json.load(filehandle)
        elif args.execution_engine is not None:
            with open(args.execution_engine + ".vocab", "r") as filehandle:
                vocab = json.load(filehandle)
        elif args.baseline_model is not None:
            with open(args.baseline_model + ".vocab", "r") as filehandle:
                vocab = json.load(filehandle)
        program_token_to_idx = vocab["program_token_to_idx"]

        include_model = args.model_type in ("PG", "EE", "PG+EE") and (
            args.sw_name.startswith("clevr") or args.sw_program == 3)
        if include_model:

            def preprocess(model):
                if args.sw_name.startswith("clevr"):
                    program_prefix = vr.programs.list_to_prefix(
                        model["program"])
                else:
                    program_prefix = clevr_util.parse_program(mode=0,
                                                              model=model)
                program_str = vr.programs.list_to_str(program_prefix)
                program_tokens = tokenize(program_str)
                program_encoded = encode(program_tokens, program_token_to_idx)
                program_encoded += [
                    program_token_to_idx["<NULL>"]
                    for _ in range(27 - len(program_encoded))
                ]
                return np.asarray(program_encoded, dtype=np.int64)

            if args.sw_name.startswith("clevr"):
                preprocessing = dict(question_model=preprocess)
            else:
                preprocessing = dict(caption_model=preprocess)

        elif args.sw_program in (1, 2):

            def preprocess(caption_pn):
                caption_pn += (caption_pn > 0) * 2
                for n, symbol in enumerate(caption_pn):
                    if symbol == 0:
                        caption_pn[n] = 2
                        break
                caption_pn = np.concatenate(([1], caption_pn))
                return caption_pn

            if args.sw_program == 1:
                preprocessing = dict(caption_pn=preprocess)
            else:
                preprocessing = dict(caption_rpn=preprocess)

        else:
            preprocessing = None

        dataset = torch_util.ShapeWorldDataset(
            dataset=dataset,
            mode=(None if args.sw_mode == "none" else args.sw_mode),
            include_model=include_model,
            epoch=(args.num_samples is None),
            preprocessing=preprocessing,
        )

        loader = ShapeWorldDataLoader(dataset=dataset,
                                      batch_size=args.batch_size)

    model = None
    if args.model_type in ("CNN", "LSTM", "CNN+LSTM", "CNN+LSTM+SA"):
        assert args.baseline_model is not None
        print("Loading baseline model from", args.baseline_model)
        model, _ = utils.load_baseline(args.baseline_model)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            model.rnn.expand_vocab(new_vocab["question_token_to_idx"])
    elif args.program_generator is not None and args.execution_engine is not None:
        pg, _ = utils.load_program_generator(args.program_generator,
                                             args.model_type)
        ee, _ = utils.load_execution_engine(args.execution_engine,
                                            verbose=False,
                                            model_type=args.model_type)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            pg.expand_encoder_vocab(new_vocab["question_token_to_idx"])
        model = (pg, ee)
    elif args.model_type == "FiLM":
        assert args.baseline_model is not None
        pg, _ = utils.load_program_generator(args.baseline_model,
                                             args.model_type)
        ee, _ = utils.load_execution_engine(args.baseline_model,
                                            verbose=False,
                                            model_type=args.model_type)
        if args.vocab_json is not None:
            new_vocab = utils.load_vocab(args.vocab_json)
            pg.expand_encoder_vocab(new_vocab["question_token_to_idx"])
        model = (pg, ee)
    else:
        print(
            "Must give either --baseline_model or --program_generator and --execution_engine"
        )
        return

    if torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor
    if args.question is not None and args.image is not None:
        run_single_example(args, model, dtype, args.question)
    # Interactive mode
    elif (args.image is not None and args.input_question_h5 is None
          and args.input_features_h5 is None):
        feats_var = extract_image_features(args, dtype)
        print(colored("Ask me something!", "cyan"))
        while True:
            # Get user question
            question_raw = input(">>> ")
            run_single_example(args, model, dtype, question_raw, feats_var)
    elif args.sw_name is not None or args.sw_config is not None:
        predictions, visualization = run_batch(args, model, dtype, loader)
        if args.sw_pred_dir is not None:
            assert args.sw_pred_name is not None
            pred_dir = os.path.join(
                args.sw_pred_dir,
                dataset.dataset.type,
                dataset.dataset.name,
                dataset.dataset.variant,
            )
            if not os.path.isdir(pred_dir):
                os.makedirs(pred_dir)
            id2word = dataset.dataset.vocabulary(value_type="language")
            with open(
                    os.path.join(
                        pred_dir,
                        args.sw_pred_name + "-" + args.sw_mode + ".txt"),
                    "w",
            ) as filehandle:
                filehandle.write("".join(
                    "{} {} {}\n".format(correct, agreement, " ".join(
                        id2word[c] for c in caption))
                    for correct, agreement, caption in zip(
                        predictions["correct"],
                        predictions["agreement"],
                        predictions["caption"],
                    )))
            print("Predictions saved")
        if args.sw_vis_dir is not None:
            assert args.sw_vis_name is not None
            from io import BytesIO
            from shapeworld.world import World

            vis_dir = os.path.join(
                args.sw_vis_dir,
                dataset.dataset.type,
                dataset.dataset.name,
                dataset.dataset.variant,
            )
            image_dir = os.path.join(vis_dir, args.sw_mode, "images")
            if not os.path.isdir(image_dir):
                os.makedirs(image_dir)
            worlds = np.transpose(visualization["world"], (0, 2, 3, 1))
            for n in range(worlds.shape[0]):
                image = World.get_image(world_array=worlds[n])
                image_bytes = BytesIO()
                image.save(image_bytes, format="png")
                with open(os.path.join(image_dir, "world-{}.png".format(n)),
                          "wb") as filehandle:
                    filehandle.write(image_bytes.getvalue())
                image_bytes.close()
            with open(
                    os.path.join(
                        vis_dir,
                        args.sw_vis_name + "-" + args.sw_mode + ".html"),
                    "w",
            ) as filehandle:
                html = dataset.dataset.get_html(
                    generated=visualization,
                    image_format="png",
                    image_dir=(args.sw_mode + "/images/"),
                )
                filehandle.write(html)
            print("Visualization saved")
    else:
        vocab = load_vocab(args)
        loader_kwargs = {
            "question_h5": args.input_question_h5,
            "feature_h5": args.input_features_h5,
            "vocab": vocab,
            "batch_size": args.batch_size,
        }
        if args.family_split_file is not None:
            with open(args.family_split_file, "r") as f:
                loader_kwargs["question_families"] = json.load(f)
        with ClevrDataLoader(**loader_kwargs) as loader:
            run_batch(args, model, dtype, loader)
Example #22
0
def main(args):
  if args.randomize_checkpoint_path == 1:
    name, ext = os.path.splitext(args.checkpoint_path)
    num = random.randint(1, 1000000)
    args.checkpoint_path = '%s_%06d%s' % (name, num, ext)
  print('Will save checkpoints to %s' % args.checkpoint_path)

  if args.sw_name is not None or args.sw_config is not None:
    from shapeworld import Dataset, torch_util
    from shapeworld.datasets import clevr_util

    class ShapeWorldDataLoader(torch_util.ShapeWorldDataLoader):

      def __init__(self, **kwargs):
        super(ShapeWorldDataLoader, self).__init__(**kwargs)

      def __iter__(self):
        for batch in super(ShapeWorldDataLoader, self).__iter__():
          question = batch['caption'].long()
          image = batch['world']
          feats = batch['world']
          answer = batch['agreement'].long()
          if 'caption_model' in batch:
            program_seq = batch['caption_model'].apply_(callable=(lambda model: clevr_util.parse_program(mode=0, model=model)))
          else:
            program_seq = torch.IntTensor([0 for _ in batch['caption']])
          program_json = dict()
          yield question, image, feats, answer, program_seq, program_json

    dataset = Dataset.create(dtype='agreement', name=args.sw_name, variant=args.sw_variant,
      language=args.sw_language, config=args.sw_config)
    print('ShapeWorld dataset: {} (variant: {})'.format(dataset, args.sw_variant))
    print('Config: ' + str(args.sw_config))

    if args.program_generator_start_from is None:
      question_token_to_idx = {
        word: index + 2 if index > 0 else 0
        for word, index in dataset.vocabularies['language'].items()
      }
      question_token_to_idx['<NULL>'] = 0
      question_token_to_idx['<START>'] = 1
      question_token_to_idx['<END>'] = 2
      vocab = dict(
        question_token_to_idx=question_token_to_idx,
        program_token_to_idx={'<NULL>': 0, '<START>': 1, '<END>': 2},  # missing!!!
        answer_token_to_idx={'false': 0, 'true': 1}
      )
      with open(args.checkpoint_path + '.vocab', 'w') as filehandle:
        json.dump(vocab, filehandle)

    else:
      with open(args.program_generator_start_from + '.vocab', 'r') as filehandle:
        vocab = json.load(filehandle)
      question_token_to_idx = vocab['question_token_to_idx']
      index = len(question_token_to_idx)
      for word in dataset.vocabularies['language']:
        if word not in question_token_to_idx:
          question_token_to_idx[word] = index
          index += 1
      with open(args.checkpoint_path + '.vocab', 'w') as filehandle:
        json.dump(vocab, filehandle)

    args.feature_dim = ','.join(str(n) for n in reversed(dataset.world_shape()))
    args.vocab_json = args.checkpoint_path + '.vocab'

    train_dataset = torch_util.ShapeWorldDataset(dataset=dataset, mode='train')  # , include_model=True)
    train_loader = ShapeWorldDataLoader(dataset=train_dataset, batch_size=args.batch_size)  # num_workers=1

    if args.sw_mixer == 1:
      val_loader = list()
      for d in dataset.datasets:
        val_dataset = torch_util.ShapeWorldDataset(dataset=d, mode='validation', epoch=(args.num_val_samples is None))
        val_loader.append(ShapeWorldDataLoader(dataset=val_dataset, batch_size=args.batch_size))  # num_workers=1
    else:
      val_dataset = torch_util.ShapeWorldDataset(dataset=dataset, mode='validation', epoch=(args.num_val_samples is None))
      val_loader = ShapeWorldDataLoader(dataset=val_dataset, batch_size=args.batch_size)  # num_workers=1

    train_loop(args, train_loader, val_loader)

  else:
    vocab = utils.load_vocab(args.vocab_json)

    if args.use_local_copies == 1:
      shutil.copy(args.train_question_h5, '/tmp/train_questions.h5')
      shutil.copy(args.train_features_h5, '/tmp/train_features.h5')
      shutil.copy(args.val_question_h5, '/tmp/val_questions.h5')
      shutil.copy(args.val_features_h5, '/tmp/val_features.h5')
      args.train_question_h5 = '/tmp/train_questions.h5'
      args.train_features_h5 = '/tmp/train_features.h5'
      args.val_question_h5 = '/tmp/val_questions.h5'
      args.val_features_h5 = '/tmp/val_features.h5'

    question_families = None
    if args.family_split_file is not None:
      with open(args.family_split_file, 'r') as f:
        question_families = json.load(f)

    train_loader_kwargs = {
      'question_h5': args.train_question_h5,
      'feature_h5': args.train_features_h5,
      'vocab': vocab,
      'batch_size': args.batch_size,
      'shuffle': args.shuffle_train_data == 1,
      'question_families': question_families,
      'max_samples': args.num_train_samples,
      'num_workers': args.loader_num_workers,
    }
    val_loader_kwargs = {
      'question_h5': args.val_question_h5,
      'feature_h5': args.val_features_h5,
      'vocab': vocab,
      'batch_size': args.batch_size,
      'question_families': question_families,
      'max_samples': args.num_val_samples,
      'num_workers': args.loader_num_workers,
    }

    with ClevrDataLoader(**train_loader_kwargs) as train_loader, \
         ClevrDataLoader(**val_loader_kwargs) as val_loader:
      train_loop(args, train_loader, val_loader)

    if args.use_local_copies == 1 and args.cleanup_local_copies == 1:
      os.remove('/tmp/train_questions.h5')
      os.remove('/tmp/train_features.h5')
      os.remove('/tmp/val_questions.h5')
      os.remove('/tmp/val_features.h5')