Beispiel #1
0
            os.path.join('data', vocab_source, 'explain_dictionary.pkl'))

    #train_dset = VQAFeatureDataset('train', q_dict, c_dict, 'cache/VQAE2',args.att_thr)
    #eval_dset = VQAFeatureDataset('val', q_dict, c_dict, 'cache/VQAE2',args.att_thr)
    train_dset = VQAEDataset('train', q_dict, c_dict, 'cache/VQAE2')
    eval_dset = VQAEDataset('val', q_dict, c_dict, 'cache/VQAE2')
    #train_dset = VQAEVQA2Dataset('train', q_dict, c_dict, 'cache')
    #eval_dset = VQAEVQA2Dataset('val', q_dict, c_dict, 'cache')
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = utils.model_factory(constructor, train_dset, args.num_hid,
                                args.att_dim, args.decode_dim).cuda()

    model_path = os.path.join(args.output, 'model.pth')
    model_state = torch.load(model_path)
    model.load_state_dict(model_state)

    print('Model has {} parameters in total'.format(utils.params_count(model)))
    #model = nn.DataParallel(model).cuda()

    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=1)
    model.train(False)
    vqa_score, results = evaluate(model, eval_loader, q_dict, c_dict)
    save_obj = {'vqa_score': vqa_score, 'results': results}
    save_results(save_obj, args.output)
    #save_results(results, args.output)
Beispiel #2
0
def run(config, is_train, eval_name):
    torch.manual_seed(config['training_parameters']['seed'])
    args.gpu = config['training_parameters']['gpu']
    output = config['logs']['dir_logs']
    batch_size = config['training_parameters']['batch_size']
    if args.gpu:
        torch.cuda.manual_seed(config['training_parameters']['seed'])
        torch.backends.cudnn.benchmark = True

    if is_train:
        '''
        eval_name 为 test 时会同时加载test 数据集 
        '''
        print("training . . .")
        model, train_dset, eval_dset, embedding_weight, test_dset = load_model_data(
            config, is_train=is_train, eval_name=eval_name)
    else:
        print("testing . . .")
        model, eval_dset = load_model_data(config,
                                           is_train=is_train,
                                           eval_name=eval_name)
        if args.gpu:
            #             model = model.cuda()
            model = nn.DataParallel(model).cuda()
        model_dir = os.path.join(output, "model_epoch16.pth")
        eval_loader = DataLoader(
            eval_dset,
            batch_size,
            shuffle=False,
            num_workers=config['training_parameters']['num_workers'],
            collate_fn=utils.trim_collate)
        utils.compute_result(eval_name, model, model_dir, eval_loader, output)
        return

    logger = utils.logger(os.path.join(output, 'log.json'))
    model_size = utils.params_count(model)

    print("nParams:", model_size)

    logger.add("model size(Params)", model_size)
    logger.add("train set", len(train_dset))
    logger.add("val set", len(eval_dset))

    with open(output + "config.yaml", "w") as yaml_file:
        yaml.dump(config, yaml_file)

#     model.embedding.init_embedding(embedding_weight)

    if args.gpu:
        #         model = model.cuda()
        model = nn.DataParallel(model).cuda()

    print("sucees to create model.")
    #     use_vg = config['data']['use_vg']
    evaluation = True if eval_name == "val" else False  #config['data']['evaluation']

    if evaluation:
        print("train with train dataset")
        eval_loader = DataLoader(
            eval_dset,
            batch_size,
            shuffle=False,
            num_workers=config['training_parameters']['num_workers'],
            collate_fn=utils.trim_collate)
        train_loader = DataLoader(
            train_dset,
            batch_size,
            shuffle=True,
            num_workers=config['training_parameters']['num_workers'],
            collate_fn=utils.trim_collate)
    else:
        print("train with train and val dataset")
        eval_loader = None
        train_dset = ConcatDataset([train_dset, eval_dset])
        train_loader = DataLoader(
            train_dset,
            batch_size,
            shuffle=True,
            num_workers=config['training_parameters']['num_workers'],
            collate_fn=utils.trim_collate)


#     model_data = torch.load(output+'model_epoch8.pth')
#     model.load_state_dict(model_data.get('model_state', model_data))
#     print("success to load model!")

# 初始化优化器
#     ignored_params = list(map(id, model.module.bert.parameters()))
#     base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
#     optim = torch.optim.Adamax([
#         {'params': base_params},
#         {'params': model.module.bert.parameters(), 'lr': 1e-6}  #FC层使用较大的学习率
#         ],
#         lr = 0.0015
#     )

    optim = torch.optim.Adamax(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=0.0015)

    #     optim = torch.optim.Adam(
    #         filter(lambda p:p.requires_grad, model.parameters()),
    #         lr=0.00015,
    #         betas = (0.9, 0.98),
    #         eps = 1e-9
    # #         weight_decay=0.001
    #     )

    train(model, train_loader, eval_loader, logger, optim, output,
          **config['training_parameters'])

    if eval_name == "val":
        model_dir = os.path.join(output, "model_best.pth")
        utils.compute_result(eval_name, model, model_dir, eval_loader, output)
    else:  # test
        model_dir = os.path.join(output, "model_epoch5.pth")
        test_loader = DataLoader(
            test_dset,
            batch_size,
            shuffle=False,
            num_workers=config['training_parameters']['num_workers'],
            collate_fn=utils.trim_collate)
        utils.compute_result(eval_name, model, model_dir, test_loader, output)
    constructor = 'build_%s' % args.model
    model1 = utils.model_factory(constructor, train_dset, args.num_hid,
                                 args.att_dim, args.decode_dim).cuda()
    model2 = utils.model_factory(constructor, train_dset, args.num_hid,
                                 args.att_dim, args.decode_dim).cuda()

    model1_path = os.path.join(args.source1, 'model.pth')
    model1_state = torch.load(model1_path)
    model1.load_state_dict(model1_state)
    model2_path = os.path.join(args.source2, 'model.pth')
    model2_state = torch.load(model2_path)
    model2.load_state_dict(model2_state)

    print('Model has {} parameters in total'.format(
        utils.params_count(model1)))

    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=1)
    model1.train(False)
    model2.train(False)
    compare(model1, model2, eval_loader, q_dict, c_dict, label2ans,
            args.output)
    evaluate(model1, eval_loader, q_dict, c_dict, True)
    evaluate(model2, eval_loader, q_dict, c_dict, False)

    #vqa_score, results = evaluate(model1, eval_loader, q_dict, c_dict)
    #save_obj = {'vqa_score': vqa_score, 'results': results}
    #save_results(save_obj, args.output)
    #     exp_logger = load_checkpoint(model.module, optimizer, cf.resume)
    # else:
    exp_logger = utils.Experiment(os.path.basename(cf.log_dir))

    meters = {
        'loss': utils.AvgMeter(),
        'acc1': utils.AvgMeter(),
        'acc5': utils.AvgMeter(),
        'batch_time': utils.AvgMeter(),
        'data_time': utils.AvgMeter(),
        'epoch_time': utils.SumMeter()
    }

    for split in vqa.data['qa'].keys():
        exp_logger.add_meters(split, meters)
    exp_logger.info['model_params'] = utils.params_count(model)
    # print('Model has {} parameters'.format(exp_logger.info['model_params']))

    print('<train.py> Start training...')

    max_step = None

    if cf.debug:
        # max_step = 5
        print('<train.py>: You are in debugging mode...')

    auto_find = {
        'train': ['train'] + [False] * cf.epochs,
        'test_dev': ['test_dev'] + [False] * cf.epochs,
        'test': ['test'] + [False] * cf.epochs,
        'test_local': ['test_local'] + [False] * cf.epochs
Beispiel #5
0
def main():
    # Hyper Parameters setting
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/mnt/data/linkaiyi/scan/data/f30k_precomp',
                        help='path to datasets')
    parser.add_argument('--path_opt',
                        default='option/FusionNoattn_baseline.yaml',
                        type=str,
                        help='path to a yaml options file')
    parser.add_argument('--data_name',
                        default='flickr30k_splits',
                        help='{coco,f30k}_splits')
    parser.add_argument('--logger_name',
                        default='./log_2',
                        help='Path to save Tensorboard log.')
    parser.add_argument(
        '--vocab_path',
        default=
        '/home/linkaiyi/fusion_wangtan/Fusion_flickr/Fusion_10.28/vocab',
        help='Path to saved vocabulary json files.')
    parser.add_argument(
        '--model_name',
        default='/mnt/data/linkaiyi/mscoco/fusion/Fusion_flic/runs/checkpoint',
        help='Path to save the model.')
    parser.add_argument('--num_epochs',
                        default=120,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--workers',
                        default=2,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr_update',
                        default=20,
                        type=int,
                        help='Number of epochs to update the learning rate.')

    opt = parser.parse_args()
    if os.path.isdir(opt.logger_name):
        if click.confirm('Logs directory already exists in {}. Erase?'.format(
                opt.logger_name, default=False)):
            os.system('rm -r ' + opt.logger_name)
    tb_logger.configure(opt.logger_name, flush_secs=5)
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    #########################################################################################
    # Create options
    #########################################################################################

    options = {'logs': {}, 'coco': {}, 'model': {'seq2vec': {}}, 'optim': {}}
    if opt.path_opt is not None:
        with open(opt.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle)
        options = utils.update_values(options, options_yaml)

    vocab = deserialize_vocab(
        os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    vocab_word = sorted(vocab.word2idx.items(),
                        key=lambda x: x[1],
                        reverse=False)
    vocab_word = [tup[0] for tup in vocab_word]
    opt.vocab_size = len(vocab)

    # Create dataset, model, criterion and optimizer

    train_loader, val_loader = data.get_loaders(opt.data_path, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)
    model = models.factory(options['model'],
                           vocab_word,
                           cuda=True,
                           data_parallel=False)

    criterion = nn.CrossEntropyLoss(weight=torch.Tensor([1, 128])).cuda()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=options['optim']['lr'])

    print('Model has {} parameters'.format(utils.params_count(model)))

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            engine.validate(val_loader, model, criterion, optimizer,
                            opt.batch_size)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    else:
        start_epoch = 0

    # Train the Model
    best_rsum = 0
    for epoch in range(start_epoch, opt.num_epochs):

        adjust_learning_rate(opt, options, optimizer, epoch)

        # train for one epoch

        engine.train(train_loader,
                     model,
                     criterion,
                     optimizer,
                     epoch,
                     print_freq=10)

        # evaluate on validation set
        rsum = engine.validate(val_loader, model, criterion, optimizer,
                               opt.batch_size)

        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': 'baseline',
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'options': options,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}{}.pth.tar'.format(epoch, best_rsum),
            prefix=opt.model_name + '/')