Exemple #1
0
def evaluate(args):

  opt = vars(args)
  
  # make other options
  opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
  
  if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)
  if args.set_cfgs is not None:
    cfg_from_list(args.set_cfgs)

  print('Using config:')
  pprint.pprint(cfg)

  # set up loader
  data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
  data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
  loader = GtMRCNLoader(data_json, data_h5)
  
  # set up model
  opt['vocab_size']= loader.vocab_size
  opt['C4_feat_dim'] = 512
  net = vgg16(opt, batch_size=1)
  
  
  net.create_architecture(81, tag='default',
                          anchor_scales=cfg.ANCHOR_SCALES,
                          anchor_ratios=cfg.ANCHOR_RATIOS)
  
  sfile = osp.join(opt['dataset_splitBy'], 'output_{}'.format(opt['output_postfix']), 'vgg16_faster_rcnn_iter_{}.pth'.format(opt['model_iter']))
  print('Restoring model snapshots from {:s}'.format(sfile))
  saved_state_dict = torch.load(str(sfile))
  count_1 = 0
  new_params = net.state_dict().copy()
  for name, param in new_params.items():
    #print(name, param.size(), saved_state_dict[name].size())
    if name in saved_state_dict and param.size() == saved_state_dict[name].size():
      new_params[name].copy_(saved_state_dict[name])
      #print('---- copy ----')
    else:
      print(name, '----')
      count_1 += 1
  print('size not match:', count_1)
  net.load_state_dict(new_params)
  
  net.eval()
  net.cuda()
  
  split = opt['split']
  
  crit = None
  acc, num_sent = eval_split(loader, net, crit, split, opt)
  print('Comprehension on %s\'s %s (%s sents) is %.2f%%' % \
        (opt['dataset_splitBy'], split, num_sent, acc*100.))
  
  # write to results.txt
  f = open('experiments/det_results.txt', 'a')
  f.write('[%s][%s], id[%s]\'s acc is %.2f%%\n' % \
          (opt['dataset_splitBy'], opt['split'], opt['id'], acc*100.0))
Exemple #2
0
def main(args):

    opt = vars(args)

    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'])
    if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir)

    # set random seed
    torch.manual_seed(opt['seed'])
    random.seed(opt['seed'])

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')

    loader = GtMRCNLoader(data_json, data_h5)

    # set up model
    opt['vocab_size'] = loader.vocab_size
    opt['C4_feat_dim'] = 512  ####
    net = vgg16(opt,
                batch_size=opt['batch_size'])  # determine batch size in opt.py

    # output directory where the models are saved
    output_dir = osp.join(opt['dataset_splitBy'],
                          'output_{}'.format(opt['output_postfix']))
    print('Output will be saved to `{:s}`'.format(output_dir))

    # tensorboard directory where the summaries are saved during training
    tb_dir = osp.join(opt['dataset_splitBy'],
                      'tb_{}'.format(opt['output_postfix']))
    print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))

    # also add the validation set, but with no flipping images
    orgflip = cfg.TRAIN.USE_FLIPPED
    cfg.TRAIN.USE_FLIPPED = False
    cfg.TRAIN.USE_FLIPPED = orgflip

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    #train_net(net, imdb, roidb, valroidb, output_dir, tb_dir,
    train_net(
        net,
        loader,
        output_dir,
        tb_dir,
        pretrained_model=
        'pyutils/mask-faster-rcnn/output/vgg16/coco_2014_train+coco_2014_valminusminival/vgg16_faster_rcnn_iter_1190000.pth',
        #pretrained_model='pyutils/mask-faster-rcnn/output/res101/coco_2014_train_minus_refer_valtest+coco_2014_valminusminival/notime/res101_mask_rcnn_iter_1250000.pth',
        max_iters=args.max_iters)
Exemple #3
0
def evaluate(params):
    # set up loader
    data_json = osp.join(
        'cache/prepro', params['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', params['dataset_splitBy'], 'data.h5')
    loader = GtMRCNLoader(data_h5=data_h5, data_json=data_json)

    # load mode info
    model_prefix = osp.join('output', params['dataset_splitBy'], params['id'])
    infos = json.load(open(model_prefix+'.json'))
    model_opt = infos['opt']
    model_path = model_prefix + '.pth'
    model = load_model(model_path, model_opt)

    # loader's feats
    feats_dir = '%s_%s_%s' % (
        model_opt['net_name'], model_opt['imdb_name'], model_opt['tag'])
    args.imdb_name = model_opt['imdb_name']
    args.net_name = model_opt['net_name']
    args.tag = model_opt['tag']
    args.iters = model_opt['iters']
    loader.prepare_mrcn(head_feats_dir=osp.join('cache/feats/', model_opt['dataset_splitBy'], 'mrcn', feats_dir),
                        args=args)
    ann_feats = osp.join('cache/feats', model_opt['dataset_splitBy'], 'mrcn',
                         '%s_%s_%s_ann_feats.h5' % (model_opt['net_name'], model_opt['imdb_name'], model_opt['tag']))
    loader.loadFeats({'ann': ann_feats})

    # check model_info and params
    assert model_opt['dataset'] == params['dataset']
    assert model_opt['splitBy'] == params['splitBy']

    # evaluate on the split,
    # predictions = [{sent_id, sent, gd_ann_id, pred_ann_id, pred_score, sub_attn, loc_attn, weights}]
    split = params['split']
    model_opt['num_sents'] = params['num_sents']
    model_opt['verbose'] = params['verbose']
    crit = None
    val_loss, acc, predictions, overall = eval_utils.eval_split(
        loader, model, crit, split, model_opt)
    print('Comprehension on %s\'s %s (%s sents) is %.2f%%' %
          (params['dataset_splitBy'], params['split'], len(predictions), acc*100.))
    print('attribute precision : %.2f%%' % (overall['precision']*100.0))
    print('attribute recall    : %.2f%%' % (overall['recall']*100.0))
    print('attribute f1        : %.2f%%' % (overall['f1']*100.0))

    # save
    out_dir = osp.join('cache', 'results', params['dataset_splitBy'], 'easy')
    if not osp.isdir(out_dir):
        os.makedirs(out_dir)
    out_file = osp.join(out_dir, params['id']+'_'+params['split']+'.json')
    with open(out_file, 'w') as of:
        json.dump({'predictions': predictions,
                   'acc': acc, 'overall': overall}, of)

    # write to results.txt
    f = open('experiments/easy_results.txt', 'a')
    f.write('[%s][%s], id[%s]\'s acc is %.2f%%\n' %
            (params['dataset_splitBy'], params['split'], params['id'], acc*100.0))
Exemple #4
0
def main(args):

  opt = vars(args)
  tb_logger.configure('tb_logs/'+opt['id'], flush_secs=2)

  # initialize
  opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
  if opt['dataset'] == 'refcocog':
    opt['unk_token'] = 3346
  elif opt['dataset'] == 'refcoco':
    opt['unk_token'] = 1996 
  elif opt['dataset'] == 'refcoco+':
    opt['unk_token'] = 2629
  checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'])
  if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir)

  # set random seed
  torch.manual_seed(opt['seed'])
  random.seed(opt['seed'])

  # set up loader
  data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
  data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
  loader = GtMRCNLoader(data_h5=data_h5, data_json=data_json)
  # prepare feats
  feats_dir = '%s_%s_%s' % (args.net_name, args.imdb_name, args.tag)
  head_feats_dir=osp.join('cache/feats/', opt['dataset_splitBy'], 'mrcn', feats_dir)
  loader.prepare_mrcn(head_feats_dir, args)
  ann_feats = osp.join('cache/feats', opt['dataset_splitBy'], 'mrcn', 
                       '%s_%s_%s_ann_feats.h5' % (opt['net_name'], opt['imdb_name'], opt['tag']))
  loader.loadFeats({'ann': ann_feats})

  # set up model
  opt['vocab_size']= loader.vocab_size
  opt['fc7_dim']   = loader.fc7_dim
  opt['pool5_dim'] = loader.pool5_dim
  opt['num_atts']  = loader.num_atts
  model = JointMatching(opt)

  # resume from previous checkpoint
  infos = {}
  if opt['start_from'] is not None:
    checkpoint = torch.load(os.path.join('output',opt['dataset_splitBy'],opt['start_from']+'.pth'))
    model.load_state_dict(checkpoint['model'].state_dict())
    infos = json.load(open(os.path.join('output',opt['dataset_splitBy'],opt['start_from']+'.json') ,'r'))
    print('start from model %s, best val score %.2f%%\n' % (opt['start_from'], infos['best_val_score']*100))

  if opt['resume']:
    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    best_val_score = infos.get('best_val_score', None)
  else:
    iter = 0
    epoch = 0
    val_accuracies = []
    val_loss_history = {}
    val_result_history = {}
    loss_history = {}
    best_val_score = None


  # set up criterion
  if opt['erase_lang_weight'] > 0 or opt['erase_allvisual_weight'] > 0:
    if opt['erase_allvisual_weight'] > 0:
      mm_crit = MaxMarginEraseCriterion(opt['visual_rank_weight'], opt['lang_rank_weight'],
        opt['erase_lang_weight'], opt['erase_allvisual_weight'], opt['margin'], opt['erase_margin'])
    elif opt['erase_lang_weight'] > 0:
      mm_crit = MaxMarginEraseCriterion(opt['visual_rank_weight'], opt['lang_rank_weight'],
        opt['erase_lang_weight'], opt['erase_allvisual_weight'], opt['margin'], opt['erase_margin'])
  else:
    mm_crit = MaxMarginCriterion(opt['visual_rank_weight'], opt['lang_rank_weight'], opt['margin'])

  att_crit = nn.BCEWithLogitsLoss(loader.get_attribute_weights())

  # move to GPU
  if opt['gpuid'] >= 0:
    model.cuda()
    mm_crit.cuda()
    att_crit.cuda()

  # set up optimizer
  optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                               lr=opt['learning_rate'],
                               betas=(opt['optim_alpha'], opt['optim_beta']),
                               eps=opt['optim_epsilon'])

  # start training
  data_time, model_time = 0, 0
  lr = opt['learning_rate']
  best_predictions, best_overall = None, None
  if opt['shuffle']:
    loader.shuffle('train')

  while True:
    # run one iteration
    loss, loss1, loss2, T, wrapped = lossFun(loader, optimizer, model, mm_crit, att_crit, opt, iter)
    data_time += T['data']
    model_time += T['model']

    # write the training loss summary
    if iter % opt['losses_log_every'] == 0:
      loss_history[iter] = loss
      # print stats
      log_toc = time.time()
      print('iter[%s](epoch[%s]), train_loss=%.3f, lr=%.2E, data:%.2fs/iter, model:%.2fs/iter' \
            % (iter, epoch, loss, lr, data_time/opt['losses_log_every'], model_time/opt['losses_log_every']))
      # write tensorboard logger
      tb_logger.log_value('epoch', epoch, step=iter)
      tb_logger.log_value('iter', iter, step=iter)
      tb_logger.log_value('training_loss', loss, step=iter)
      tb_logger.log_value('training_loss1', loss1, step=iter)
      tb_logger.log_value('training_loss2', loss2, step=iter)
      tb_logger.log_value('learning_rate', lr, step=iter)
      data_time, model_time = 0, 0

    # decay the learning rates
    if opt['learning_rate_decay_start'] > 0 and epoch > opt['learning_rate_decay_start']:
      frac = (epoch - opt['learning_rate_decay_start']) / opt['learning_rate_decay_every']
      decay_factor =  0.1 ** frac
      lr = opt['learning_rate'] * decay_factor
      # update optimizer's learning rate
      model_utils.set_lr(optimizer, lr)

              
    # update iter and epoch
    iter += 1
    #wrapped = True # for debugging validation phase
    if wrapped:
      if opt['shuffle']:
        loader.shuffle('train')
      epoch += 1
      # eval loss and save checkpoint
      val_loss, acc, predictions, overall = eval_utils.eval_split(loader, model, None, 'val', opt)
      val_loss_history[iter] = val_loss
      val_result_history[iter] = {'loss': val_loss, 'accuracy': acc}
      val_accuracies += [(iter, acc)]
      print('val loss: %.2f' % val_loss)
      print('val acc : %.2f%%\n' % (acc*100.0))
      print('val precision : %.2f%%' % (overall['precision']*100.0))
      print('val recall    : %.2f%%' % (overall['recall']*100.0))
      print('val f1        : %.2f%%' % (overall['f1']*100.0))
      # write tensorboard logger
      tb_logger.log_value('val_loss', val_loss, step=iter)
      tb_logger.log_value('val_acc', acc, step=iter)
      tb_logger.log_value('val precision', overall['precision']*100.0, step=iter)
      tb_logger.log_value('val recall', overall['recall']*100.0, step=iter)
      tb_logger.log_value('val f1', overall['f1']*100.0, step=iter)

      # save model if best
      current_score = acc
      if best_val_score is None or current_score > best_val_score:
        best_val_score = current_score
        best_predictions = predictions
        best_overall = overall
        checkpoint_path = osp.join(checkpoint_dir, opt['id'] + '.pth')
        checkpoint = {}
        checkpoint['model'] = model
        checkpoint['opt'] = opt
        torch.save(checkpoint, checkpoint_path) 
        print('model saved to %s' % checkpoint_path) 

      # write json report 
      infos['iter'] = iter
      infos['epoch'] = epoch
      infos['iterators'] = loader.iterators
      infos['loss_history'] = loss_history
      infos['val_accuracies'] = val_accuracies
      infos['val_loss_history'] = val_loss_history
      infos['best_val_score'] = best_val_score
      infos['best_predictions'] = predictions if best_predictions is None else best_predictions
      infos['best_overall'] = overall if best_overall is None else best_overall
      infos['opt'] = opt
      infos['val_result_history'] = val_result_history
      infos['word_to_ix'] = loader.word_to_ix
      infos['att_to_ix'] = loader.att_to_ix
      with open(osp.join(checkpoint_dir, opt['id']+'.json'), 'wb') as io:
        json.dump(infos, io)


      if epoch >= opt['max_epochs'] and opt['max_epochs'] > 0:
        break
Exemple #5
0
def evaluate(args):

    opt = vars(args)

    # make other options
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = GtMRCNLoader(data_json, data_h5)

    # set up model
    opt['vocab_size'] = loader.vocab_size
    opt['C4_feat_dim'] = 1024
    net = resnetv1(opt, batch_size=1, num_layers=101)

    net.create_architecture(81,
                            tag='default',
                            anchor_scales=cfg.ANCHOR_SCALES,
                            anchor_ratios=cfg.ANCHOR_RATIOS)

    sfile = osp.join(opt['dataset_splitBy'],
                     'output_{}'.format(opt['output_postfix']),
                     'res101_mask_rcnn_iter_{}.pth'.format(opt['model_iter']))
    print('Restoring model snapshots from {:s}'.format(sfile))
    saved_state_dict = torch.load(str(sfile))
    count_1 = 0
    new_params = net.state_dict().copy()
    for name, param in new_params.items():
        #print(name, param.size(), saved_state_dict[name].size())
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            #print('---- copy ----')
        else:
            print(name, '----')
            count_1 += 1
    print('size not match:', count_1)
    net.load_state_dict(new_params)

    net.eval()
    net.cuda()

    split = opt['split']

    crit = None
    acc, eval_seg_iou_list, seg_correct, seg_total, cum_I, cum_U, num_sent = eval_split(
        loader, net, crit, split, opt)
    print('Comprehension on %s\'s %s (%s sents) is %.2f%%' % \
          (opt['dataset_splitBy'], split, num_sent, acc*100.))

    # write to results.txt
    f = open('experiments/det_results.txt', 'a')
    f.write('[%s][%s], id[%s]\'s acc is %.2f%%\n' % \
            (opt['dataset_splitBy'], opt['split'], opt['id'], acc*100.0))

    # print
    print('Segmentation results on [%s][%s]' % (opt['dataset_splitBy'], split))
    results_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        results_str += '    precision@%s = %.2f\n' % \
          (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]*100./seg_total)
    results_str += '    overall IoU = %.2f\n' % (cum_I * 100. / cum_U)
    print(results_str)

    # save results
    #save_dir = osp.join('cache/results', opt['dataset_splitBy'], 'masks')
    #if not osp.isdir(save_dir):
    #  os.makedirs(save_dir)

    #results['iou'] = cum_I*1./cum_U
    #assert 'rle' in results['predictions'][0]
    #with open(osp.join(save_dir, args.id+'_'+split+'.json'), 'w') as f:
    #  json.dump(results, f)

    # write to results.txt
    f = open('experiments/mask_results.txt', 'a')
    f.write('[%s][%s]\'s iou is:\n%s' % \
            (opt['dataset_splitBy'], split, results_str))
Exemple #6
0
def main(args):

    opt = vars(args)

    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'])
    if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir)

    # set random seed
    torch.manual_seed(opt['seed'])
    random.seed(opt['seed'])

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = GtMRCNLoader(data_h5=data_h5, data_json=data_json)
    # prepare feats
    feats_dir = '%s_%s_%s' % (args.net_name, args.imdb_name, args.tag)
    head_feats_dir = osp.join('cache/feats/', opt['dataset_splitBy'], 'mrcn',
                              feats_dir)
    loader.prepare_mrcn(head_feats_dir, args)
    ann_feats = osp.join(
        'cache/feats', opt['dataset_splitBy'], 'mrcn',
        '%s_%s_%s_ann_feats.h5' %
        (opt['net_name'], opt['imdb_name'], opt['tag']))
    loader.loadFeats({'ann': ann_feats})

    # set up model
    opt['vocab_size'] = loader.vocab_size
    opt['fc7_dim'] = loader.fc7_dim
    opt['pool5_dim'] = loader.pool5_dim
    opt['num_atts'] = loader.num_atts
    model = JointMatching(opt)

    # resume from previous checkpoint
    infos = {}
    if opt['start_from'] is not None:
        pass
    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt['load_best_score'] == 1:
        best_val_score = infos.get('best_val_score', None)

    # set up criterion
    mm_crit = MaxMarginCriterion(opt['visual_rank_weight'],
                                 opt['lang_rank_weight'], opt['margin'])
    att_crit = nn.BCEWithLogitsLoss(loader.get_attribute_weights())

    # move to GPU
    if opt['gpuid'] >= 0:
        model.cuda()
        mm_crit.cuda()
        att_crit.cuda()

    # set up optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt['learning_rate'],
                                 betas=(opt['optim_alpha'], opt['optim_beta']),
                                 eps=opt['optim_epsilon'])

    # start training
    data_time, model_time = 0, 0
    lr = opt['learning_rate']
    best_predictions, best_overall = None, None
    while True:
        # run one iteration
        loss, T, wrapped = lossFun(loader, optimizer, model, mm_crit, att_crit,
                                   opt, iter)
        data_time += T['data']
        model_time += T['model']

        # write the training loss summary
        if iter % opt['losses_log_every'] == 0:
            loss_history[iter] = loss
            # print stats
            log_toc = time.time()
            print('iter[%s](epoch[%s]), train_loss=%.3f, lr=%.2E, data:%.2fs/iter, model:%.2fs/iter' \
                  % (iter, epoch, loss, lr, data_time/opt['losses_log_every'], model_time/opt['losses_log_every']))
            data_time, model_time = 0, 0

        # decay the learning rates
        if opt['learning_rate_decay_start'] > 0 and iter > opt[
                'learning_rate_decay_start']:
            frac = (iter - opt['learning_rate_decay_start']
                    ) / opt['learning_rate_decay_every']
            decay_factor = 0.1**frac
            lr = opt['learning_rate'] * decay_factor
            # update optimizer's learning rate
            model_utils.set_lr(optimizer, lr)

        # eval loss and save checkpoint
        if iter % opt['save_checkpoint_every'] == 0 or iter == opt['max_iters']:
            val_loss, acc, predictions, overall = eval_utils.eval_split(
                loader, model, None, 'val', opt)
            val_loss_history[iter] = val_loss
            val_result_history[iter] = {'loss': val_loss, 'accuracy': acc}
            val_accuracies += [(iter, acc)]
            print('validation loss: %.2f' % val_loss)
            print('validation acc : %.2f%%\n' % (acc * 100.0))
            print('validation precision : %.2f%%' %
                  (overall['precision'] * 100.0))
            print('validation recall    : %.2f%%' %
                  (overall['recall'] * 100.0))
            print('validation f1        : %.2f%%' % (overall['f1'] * 100.0))

            # save model if best
            current_score = acc
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_predictions = predictions
                best_overall = overall
                checkpoint_path = osp.join(checkpoint_dir, opt['id'] + '.pth')
                checkpoint = {}
                checkpoint['model'] = model
                checkpoint['opt'] = opt
                torch.save(checkpoint, checkpoint_path)
                print('model saved to %s' % checkpoint_path)

            # write json report
            infos['iter'] = iter
            infos['epoch'] = epoch
            infos['iterators'] = loader.iterators
            infos['loss_history'] = loss_history
            infos['val_accuracies'] = val_accuracies
            infos['val_loss_history'] = val_loss_history
            infos['best_val_score'] = best_val_score
            infos[
                'best_predictions'] = predictions if best_predictions is None else best_predictions
            infos[
                'best_overall'] = overall if best_overall is None else best_overall
            infos['opt'] = opt
            infos['val_result_history'] = val_result_history
            infos['word_to_ix'] = loader.word_to_ix
            infos['att_to_ix'] = loader.att_to_ix
            #with open(osp.join(checkpoint_dir, opt['id']+'.json'), 'wb') as io:
            with open(osp.join(checkpoint_dir, opt['id'] + '.json'),
                      'w') as io:
                json.dump(infos, io)

        # update iter and epoch
        iter += 1
        if wrapped:
            epoch += 1
        if iter >= opt['max_iters'] and opt['max_iters'] > 0:
            break