Example #1
0
def main():
    global args, best_acc1
    args = parser.parse_args()

    #########################################################################################
    # Create options
    #########################################################################################
    if args.bert_model == "bert-base-uncased":
        question_features_path = BASE_EXTRACTED_QUES_FEATURES_PATH
    elif args.bert_model == "bert-base-multilingual-cased":
        question_features_path = CASED_EXTRACTED_QUES_FEATURES_PATH
    else:
        question_features_path = EXTRACTED_QUES_FEATURES_PATH

    options = {
        'vqa': {
            'trainsplit': args.vqa_trainsplit
        },
        'logs': {
            'dir_logs': args.dir_logs
        },
        'model': {
            'arch': args.arch,
            'seq2vec': {
                'type': args.st_type,
                'dropout': args.st_dropout,
                'fixed_emb': args.st_fixed_emb
            }
        },
        'optim': {
            'lr': args.learning_rate,
            'batch_size': args.batch_size,
            'epochs': args.epochs
        }
    }
    if args.path_opt is not None:
        with open(args.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle, Loader=yaml.FullLoader)
        options = utils.update_values(options, options_yaml)
    print('## args')
    pprint(vars(args))
    print('## options')
    pprint(options)
    if args.help_opt:
        return

    # Set datasets options
    if 'vgenome' not in options:
        options['vgenome'] = None

    #########################################################################################
    # Create needed datasets
    #########################################################################################

    trainset = datasets.factory_VQA(options['vqa']['trainsplit'],
                                    options['vqa'], options['coco'],
                                    options['vgenome'])
    train_loader = trainset.data_loader(
        batch_size=options['optim']['batch_size'],
        num_workers=args.workers,
        shuffle=True)

    if options['vqa']['trainsplit'] == 'train':
        valset = datasets.factory_VQA('val', options['vqa'], options['coco'])
        val_loader = valset.data_loader(
            batch_size=options['optim']['batch_size'],
            num_workers=args.workers)

    if options['vqa']['trainsplit'] == 'trainval' or args.evaluate:
        testset = datasets.factory_VQA('test', options['vqa'], options['coco'])
        test_loader = testset.data_loader(
            batch_size=options['optim']['batch_size'],
            num_workers=args.workers)

    #########################################################################################
    # Create model, criterion and optimizer
    #########################################################################################

    model = models.factory(options['model'],
                           trainset.vocab_words(),
                           trainset.vocab_answers(),
                           cuda=True,
                           data_parallel=True)
    criterion = criterions.factory(options['vqa'], cuda=True)
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        options['optim']['lr'])

    #########################################################################################
    # args.resume: resume from a checkpoint OR create logs directory
    #########################################################################################

    exp_logger = None
    if args.resume:
        args.start_epoch, best_acc1, exp_logger = load_checkpoint(
            model.module, optimizer,
            os.path.join(options['logs']['dir_logs'], args.resume))
    else:
        # Or create logs directory
        if os.path.isdir(options['logs']['dir_logs']):
            if click.confirm(
                    'Logs directory already exists in {}. Erase?'.format(
                        options['logs']['dir_logs'], default=False)):
                os.system('rm -r ' + options['logs']['dir_logs'])
            else:
                return
        os.system('mkdir -p ' + options['logs']['dir_logs'])
        path_new_opt = os.path.join(options['logs']['dir_logs'],
                                    os.path.basename(args.path_opt))
        path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml')
        with open(path_new_opt, 'w') as f:
            yaml.dump(options, f, default_flow_style=False)
        with open(path_args, 'w') as f:
            yaml.dump(vars(args), f, default_flow_style=False)

    if exp_logger is None:
        # Set loggers
        exp_name = os.path.basename(
            options['logs']['dir_logs'])  # add timestamp
        exp_logger = logger.Experiment(exp_name, options)
        exp_logger.add_meters('train', make_meters())
        exp_logger.add_meters('test', make_meters())
        if options['vqa']['trainsplit'] == 'train':
            exp_logger.add_meters('val', make_meters())
        exp_logger.info['model_params'] = utils.params_count(model)
        print('Model has {} parameters'.format(
            exp_logger.info['model_params']))

    #########################################################################################
    # args.evaluate: on valset OR/AND on testset
    #########################################################################################

    if args.evaluate:
        path_logger_json = os.path.join(options['logs']['dir_logs'],
                                        'logger.json')

        if options['vqa']['trainsplit'] == 'train':
            acc1, val_results = engine.validate(val_loader, model, criterion,
                                                exp_logger, args.start_epoch,
                                                args.print_freq)
            # save results and compute OpenEnd accuracy
            exp_logger.to_json(path_logger_json)
            save_results(val_results, args.start_epoch, valset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])

        test_results, testdev_results = engine.test(test_loader, model,
                                                    exp_logger,
                                                    args.start_epoch,
                                                    args.print_freq)
        # save results and DOES NOT compute OpenEnd accuracy
        exp_logger.to_json(path_logger_json)
        save_results(test_results, args.start_epoch, testset.split_name(),
                     options['logs']['dir_logs'], options['vqa']['dir'])
        save_results(testdev_results, args.start_epoch,
                     testset.split_name(testdev=True),
                     options['logs']['dir_logs'], options['vqa']['dir'])
        return

    #########################################################################################
    # Begin training on train/val or trainval/test
    #########################################################################################

    for epoch in range(args.start_epoch + 1, options['optim']['epochs']):
        # if epoch > 1 and gen_utils.str2bool(args.is_augment_image) and 'options/med/' in args.path_opt:
        #     cmd = "python main/extract.py --dir_data data/raw/vqa_med/preprocessed --dataset med --is_augment_image 1 -b 64"
        #     os.system(cmd)
        # if epoch == 1 and 'options/med/' in args.path_opt:
        #     cmd = "python main/extract.py --dir_data data/raw/vqa_med/preprocessed --dataset med --is_augment_image 0 -b 64"
        #     os.system(cmd)

        # train for one epoch
        engine.train(train_loader,
                     model,
                     criterion,
                     optimizer,
                     exp_logger,
                     epoch,
                     args.print_freq,
                     dict=io_utils.read_pickle(question_features_path),
                     bert_dim=options["model"]["dim_q"])

        if options['vqa']['trainsplit'] == 'train':
            # evaluate on validation set
            acc1, val_results = engine.validate(
                val_loader,
                model,
                criterion,
                exp_logger,
                epoch,
                args.print_freq,
                topk=5,
                dict=io_utils.read_pickle(question_features_path),
                bert_dim=options["model"]["dim_q"])
            # remember best prec@1 and save checkpoint
            is_best = acc1 > best_acc1
            best_acc1 = max(acc1, best_acc1)
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': options['model']['arch'],
                    'best_acc1': best_acc1,
                    'exp_logger': exp_logger
                }, model.module.state_dict(), optimizer.state_dict(),
                options['logs']['dir_logs'], args.save_model,
                args.save_all_from, is_best)

            # save results and compute OpenEnd accuracy
            save_results(val_results, epoch, valset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])
        else:
            test_results, testdev_results = engine.test(
                test_loader,
                model,
                exp_logger,
                epoch,
                args.print_freq,
                topk=5,
                dict=io_utils.read_pickle(question_features_path),
                bert_dim=options["model"]["dim_q"])

            # save checkpoint at every timestep
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': options['model']['arch'],
                    'best_acc1': best_acc1,
                    'exp_logger': exp_logger
                }, model.module.state_dict(), optimizer.state_dict(),
                options['logs']['dir_logs'], args.save_model,
                args.save_all_from)

            # save results and DOES NOT compute OpenEnd accuracy
            save_results(test_results, epoch, testset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])
            save_results(testdev_results, epoch,
                         testset.split_name(testdev=True),
                         options['logs']['dir_logs'], options['vqa']['dir'])
Example #2
0
def main():
    global args, best_acc1
    args = parser.parse_args()

    # Set options
    options = {
        'vqa' : {
            'trainsplit': args.vqa_trainsplit,
            'dropout': args.emb_drop
        },
        'logs': {
            'dir_logs': args.dir_logs
        },
        'model': {
            'arch': args.arch,
            'seq2vec': {
                'type': args.st_type,
                'dropout': args.st_dropout,
                'fixed_emb': args.st_fixed_emb
            }
        },
        'optim': {
            'lr': args.learning_rate,
            'batch_size': args.batch_size,
            'epochs': args.epochs
        }
    }
    if args.path_opt is not None:
        with open(args.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle)
        options = utils.update_values(options, options_yaml)
    print('## args'); pprint(vars(args))
    print('## options'); pprint(options)
    if args.help_opt:
        return

    # Set datasets
    trainset = datasets.factory_VQA(options['vqa']['trainsplit'], options['vqa'], options['coco'])
    train_loader = trainset.data_loader(batch_size=options['optim']['batch_size'],
                                        num_workers=args.workers,
                                        shuffle=True)                                      

    if options['vqa']['trainsplit'] == 'train':
        valset = datasets.factory_VQA('val', options['vqa'], options['coco'])
        val_loader = valset.data_loader(batch_size=options['optim']['batch_size'],
                                        num_workers=args.workers)
    if options['vqa']['trainsplit'] == 'trainval' or args.evaluate:
        testset = datasets.factory_VQA('test', options['vqa'], options['coco'])
        test_loader = testset.data_loader(batch_size=options['optim']['batch_size'],
                                          num_workers=args.workers)
    
    # Set model, criterion and optimizer
    model = getattr(models, options['model']['arch'])(
        options['model'], trainset.vocab_words(), trainset.vocab_answers())

    model = nn.DataParallel(model).cuda()
    criterion = criterions.factory_loss(options['vqa'], cuda=True)
    #optimizer = torch.optim.Adam([model.module.seq2vec.rnn.gru_cell.parameters()], options['optim']['lr'])
    #optimizer = torch.optim.Adam(model.parameters(), options['optim']['lr'])
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), options['optim']['lr'])
    
    # Optionally resume from a checkpoint
    exp_logger = None
    if args.resume:
        args.start_epoch, best_acc1, exp_logger = load_checkpoint(model.module, optimizer,
            os.path.join(options['logs']['dir_logs'], args.resume))
    else:
        # Or create logs directory
        if os.path.isdir(options['logs']['dir_logs']):
            if click.confirm('Logs directory already exists in {}. Erase?'
                .format(options['logs']['dir_logs'], default=False)):
                os.system('rm -r ' + options['logs']['dir_logs'])
            else:
                return
        os.system('mkdir -p ' + options['logs']['dir_logs'])
        path_new_opt = os.path.join(options['logs']['dir_logs'],
                       os.path.basename(args.path_opt))
        path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml')
        with open(path_new_opt, 'w') as f:
            yaml.dump(options, f, default_flow_style=False)
        with open(path_args, 'w') as f:
            yaml.dump(vars(args), f, default_flow_style=False)
        
    if exp_logger is None:
        # Set loggers
        exp_name = os.path.basename(options['logs']['dir_logs']) # add timestamp
        exp_logger = logger.Experiment(exp_name, options)
        exp_logger.add_meters('train', make_meters())
        exp_logger.add_meters('test', make_meters())
        if options['vqa']['trainsplit'] == 'train':
            exp_logger.add_meters('val', make_meters())
        exp_logger.info['model_params'] = utils.params_count(model)
        print('Model has {} parameters'.format(exp_logger.info['model_params']))

    # Begin evaluation and training
    if args.evaluate:
        path_logger_json = os.path.join(options['logs']['dir_logs'], 'logger.json')

        if options['vqa']['trainsplit'] == 'train':
            acc1, val_results = engine.validate(val_loader, model, criterion,
                                                 exp_logger, args.start_epoch, args.print_freq)
            # save results and compute OpenEnd accuracy
            exp_logger.to_json(path_logger_json)
            save_results(val_results, args.start_epoch, valset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])
        
        test_results, testdev_results = engine.test(test_loader, model, exp_logger,
                                                    args.start_epoch, args.print_freq)
        # save results and DOES NOT compute OpenEnd accuracy
        exp_logger.to_json(path_logger_json)
        save_results(test_results, args.start_epoch, testset.split_name(),
                     options['logs']['dir_logs'], options['vqa']['dir'])
        save_results(testdev_results, args.start_epoch, testset.split_name(testdev=True),
                     options['logs']['dir_logs'], options['vqa']['dir'])
        return

    for epoch in range(args.start_epoch+1, options['optim']['epochs']):
        #adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        engine.train(train_loader, model, criterion, optimizer, 
                     exp_logger, epoch, args.print_freq)
        
        if options['vqa']['trainsplit'] == 'train':
            # evaluate on validation set
            acc1, val_results = engine.validate(val_loader, model, criterion,
                                                exp_logger, epoch, args.print_freq)
            # remember best prec@1 and save checkpoint
            is_best = acc1 > best_acc1
            best_acc1 = max(acc1, best_acc1)
            save_checkpoint({
                    'epoch': epoch,
                    'arch': options['model']['arch'],
                    'best_acc1': best_acc1,
                    'exp_logger': exp_logger
                },
                model.module.state_dict(),
                optimizer.state_dict(),
                options['logs']['dir_logs'],
                args.save_model,
                args.save_all_from,
                is_best)

            # save results and compute OpenEnd accuracy
            save_results(val_results, epoch, valset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])
        else:
            test_results, testdev_results = engine.test(test_loader, model, exp_logger,
                                                        epoch, args.print_freq)

            # save checkpoint at every timestep
            save_checkpoint({
                    'epoch': epoch,
                    'arch': options['model']['arch'],
                    'best_acc1': best_acc1,
                    'exp_logger': exp_logger
                },
                model.module.state_dict(),
                optimizer.state_dict(),
                options['logs']['dir_logs'],
                args.save_model,
                args.save_all_from)

            # save results and DOES NOT compute OpenEnd accuracy
            save_results(test_results, epoch, testset.split_name(),
                         options['logs']['dir_logs'], options['vqa']['dir'])
            save_results(testdev_results, epoch, testset.split_name(testdev=True),
                         options['logs']['dir_logs'], options['vqa']['dir'])
Example #3
0
def main():
    global args, best_acc1
    args = parser.parse_args()

    #########################################################################################
    # Create options
    #########################################################################################

    options = {
        'vqa' : {
            'trainsplit': args.vqa_trainsplit
        },
        'logs': {
            'dir_logs': args.dir_logs
        },
        'model': {
            'arch': args.arch,
            'seq2vec': {
                'type': args.st_type,
                'dropout': args.st_dropout,
                'fixed_emb': args.st_fixed_emb
            }
        },
        'optim': {
            'lr': args.learning_rate,
            'batch_size': args.batch_size,
            'epochs': args.epochs
        }
    }
    if args.path_opt is not None:
        with open(args.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle)
        options = utils.update_values(options, options_yaml)
    # print('## args'); pprint(vars(args))
    # print('## options'); pprint(options)
    if args.help_opt:
        return

    # Set datasets options
    if 'vgenome' not in options:
        options['vgenome'] = None

    #########################################################################################
    # Create needed datasets
    #########################################################################################

    trainset = datasets.factory_VQA(options['vqa']['trainsplit'],
                                    options['vqa'],
                                    options['coco'],
                                    options['vgenome'])
    train_loader = trainset.data_loader(batch_size=options['optim']['batch_size'],
                                        num_workers=args.workers,
                                        shuffle=True)

    imgs_trainset = CustomDataset(root_dir='FLIR/', transform=None)
    imgs_train_loader = DataLoader(imgs_trainset, batch_size=4)

    # if options['vqa']['trainsplit'] == 'train':
        # valset = datasets.factory_VQA('val', options['vqa'], options['coco'])
        # val_loader = valset.data_loader(batch_size=options['optim']['batch_size'],
        #                                 num_workers=args.workers)

    if options['vqa']['trainsplit'] == 'trainval' or args.evaluate:
        testset = datasets.factory_VQA('test', options['vqa'], options['coco'])
        test_loader = testset.data_loader(batch_size=options['optim']['batch_size'],
                                          num_workers=args.workers)
    
    #########################################################################################
    # Create model, criterion and optimizer
    #########################################################################################
    
    model = models.factory(options['model'],
                           trainset.vocab_words(), trainset.vocab_answers(),
                           cuda=True, data_parallel=True)
    criterion = criterions.factory(options['vqa'], cuda=True)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                            options['optim']['lr'])
    
    #########################################################################################
    # args.resume: resume from a checkpoint OR create logs directory
    #########################################################################################

    exp_logger = None
    if args.resume:
        args.start_epoch, best_acc1, exp_logger = load_checkpoint(model.module, optimizer,
            os.path.join(options['logs']['dir_logs'], args.resume))
    # else:
    #     # Or create logs directory
    #     if os.path.isdir(options['logs']['dir_logs']):
    #         if click.confirm('Logs directory already exists in {}. Erase?'
    #             .format(options['logs']['dir_logs'], default=False)):
    #             os.system('rm -r ' + options['logs']['dir_logs'])
    #         else:
    #             return
    #     os.system('mkdir -p ' + options['logs']['dir_logs'])
    #     path_new_opt = os.path.join(options['logs']['dir_logs'],
    #                    os.path.basename(args.path_opt))
    #     path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml')
    #     with open(path_new_opt, 'w') as f:
    #         yaml.dump(options, f, default_flow_style=False)
    #     with open(path_args, 'w') as f:
    #         yaml.dump(vars(args), f, default_flow_style=False)        
    if exp_logger is None:
        # Set loggers
        exp_name = os.path.basename(options['logs']['dir_logs']) # add timestamp
        exp_logger = logger.Experiment(exp_name, options)
        exp_logger.add_meters('train', make_meters())
        exp_logger.add_meters('test', make_meters())
        if options['vqa']['trainsplit'] == 'train':
            exp_logger.add_meters('val', make_meters())
        exp_logger.info['model_params'] = utils.params_count(model)
        print('Model has {} parameters'.format(exp_logger.info['model_params']))

    #########################################################################################
    # args.evaluate: on valset OR/AND on testset
    #########################################################################################

    # if args.evaluate:
    #     path_logger_json = os.path.join(options['logs']['dir_logs'], 'logger.json')

    #     if options['vqa']['trainsplit'] == 'train':
    #         acc1, val_results = engine.validate(val_loader, model, criterion,
    #                                             exp_logger, args.start_epoch, args.print_freq)
    #         # save results and compute OpenEnd accuracy
    #         exp_logger.to_json(path_logger_json)
    #         save_results(val_results, args.start_epoch, valset.split_name(),
    #                      options['logs']['dir_logs'], options['vqa']['dir'])
        
    #     test_results, testdev_results = engine.test(test_loader, model, exp_logger,
    #                                                 args.start_epoch, args.print_freq)
    #     # save results and DOES NOT compute OpenEnd accuracy
    #     exp_logger.to_json(path_logger_json)
    #     save_results(test_results, args.start_epoch, testset.split_name(),
    #                  options['logs']['dir_logs'], options['vqa']['dir'])
    #     save_results(testdev_results, args.start_epoch, testset.split_name(testdev=True),
    #                  options['logs']['dir_logs'], options['vqa']['dir'])
    #     return

    #########################################################################################
    # Begin training on train/val or trainval/test
    #########################################################################################

    print("Started training")

    for epoch in range(args.start_epoch+1, options['optim']['epochs']):
        #adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        engine.train(imgs_train_loader, model, criterion, optimizer, 
                     exp_logger, epoch, args.print_freq)