コード例 #1
0
ファイル: train.py プロジェクト: dfan/vqa-baseline
def train(num_epochs, exp_dir, eval_interval, learning_rate, batch_size):
    train_dataset = VQADataset(split='train')
    test_dataset = VQADataset(split='val')
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=3,
                                   drop_last=True)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=2)
    criterion = nn.CrossEntropyLoss()

    total_steps = len(train_loader)
    model = Classifier(vocab_size=train_dataset.get_embedding_dim(),
                       embedding_dim=300,
                       hidden_dim=2048,
                       dim_input=2048,
                       dim_output=2048,
                       top_ans=3000).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    iter = 0
    for epoch in range(num_epochs):
        for i, (images, questions, answers, q_ids,
                lengths) in enumerate(train_loader):
            images = images.to(device)
            questions = questions.to(device)
            answers = answers.to(device)
            model.train()

            output = model(images, questions, lengths)
            loss = criterion(output, answers)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 50 == 0:
                curr_iter = epoch * len(train_loader) + i
                print('Epoch [{}/{}], Step [{}/{}], Batch Loss: {:.4f}'.format(
                    epoch + 1, num_epochs, i + 1, total_steps, loss.item()))
                sys.stdout.flush()
            # Do some evaluations
            if iter > 0 and (iter) % eval_interval == 0:
                print('Evaluating at iter {}:'.format(iter))
                curr_acc = evaluate(model, exp_dir, test_loader,
                                    train_dataset.inverse_top_answers, iter)
                print('Epoch [{}/{}] Approx. training accuracy: {}'.format(
                    epoch + 1, num_epochs, curr_acc))
                if not os.path.exists('models'):
                    os.mkdir('models')
                if not os.path.exists('models/{}'.format(exp_dir)):
                    os.mkdir('models/{}'.format(exp_dir))
                torch.save(model.state_dict(),
                           'models/{}/model_iter_{}.bin'.format(exp_dir, iter))
                torch.save(
                    optimizer.state_dict(),
                    'models/{}/optimizer_iter_{}.bin'.format(exp_dir, iter))
            iter += 1
コード例 #2
0
ファイル: evaluate_hw2.py プロジェクト: Kudlech/VQA
def evaluate_hw2(cfg: DictConfig) -> float:

    main_utils.init(cfg)

    load_v2()

    # Load dataset

    path_image_train = '/datashare/train2014/COCO_train2014_'
    path_question_train = '/datashare/v2_OpenEnded_mscoco_train2014_questions.json'
    train_dataset = VQADataset(path_answers=cfg['main']['paths']['train'],
                               path_image=path_image_train,
                               path_questions=path_question_train)
    path_image_val = '/datashare/val2014/COCO_val2014_'
    path_question_train = '/datashare/v2_OpenEnded_mscoco_val2014_questions.json'
    val_dataset = VQADataset(path_answers=cfg['main']['paths']['validation'],
                             path_image=path_image_val,
                             path_questions=path_question_train,
                             word_dict=train_dataset.word_dict)

    eval_loader = DataLoader(val_dataset,
                             cfg['train']['batch_size'],
                             shuffle=True,
                             num_workers=cfg['main']['num_workers'])

    image_dim = train_dataset.pic_size
    output_dim = 2410

    model = VQAModel(batch_size=cfg['train']['batch_size'],
                     word_vocab_size=train_dataset.vocab_size,
                     lstm_hidden=cfg['train']['num_hid'],
                     output_dim=output_dim,
                     dropout=cfg['train']['dropout'],
                     word_embedding_dim=cfg['train']['word_embedding_dim'],
                     question_output_dim=cfg['train']['question_output_dim'],
                     image_dim=image_dim,
                     last_hidden_fc_dim=cfg['train']['last_hidden_fc_dim'])
    if torch.cuda.is_available():
        model = model.cuda()
    model.load_state_dict(
        torch.load('model.pkl',
                   map_location=lambda storage, loc: storage)['model_state'])
    model.train(False)
    eval_score, eval_loss = evaluate(model, eval_loader)

    print(f"The evaluation score is {eval_score}")

    return eval_score
コード例 #3
0
def main():
    global args
    args = parser.parse_args()
    args_str = json.dumps(vars(args), indent=2)
    print('[Info] called with: ' + args_str)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # checkpoint directory
    cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, args.checkpoint)

    # select device
    torch.cuda.set_device(args.gpu_id)
    print('[Info] use gpu: {}'.format(torch.cuda.current_device()))

    # data
    print('[Info] init dataset')
    model_group_name, model_name = args.model.split('/')
    val_set = VQADataset('test', model_group_name)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.bs,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    print('sample count: {}'.format(len(val_set)))

    # model
    print('[Info] construct model')
    model_group = import_module('models.' + model_group_name)
    model = getattr(model_group, model_name)(num_words=val_set.num_words,
                                             num_ans=val_set.num_ans,
                                             emb_size=get_emb_size())
    model.cuda()
    cudnn.benchmark = True
    print('[Info] model name: ' + args.model)

    # predict
    fnames = [(i, 'checkpoint-{:03}.pth.tar'.format(i))
              for i in range(args.start_epoch, args.end_epoch, args.epoch_freq)
              ]
    cp_files = [(i, os.path.join(cfg.LOG_DIR, fname)) for i, fname in fnames]
    for epoch, cp_file in cp_files:
        if os.path.isfile(cp_file):
            print("[Info] loading checkpoint '{}'".format(cp_file))
            checkpoint = torch.load(cp_file)
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print("[Info] no checkpoint found at '{}'".format(cp_file))
            continue

        results = predict(val_loader, model)
        result_file = os.path.join(cfg.LOG_DIR,
                                   'result-{:03}.json'.format(epoch))
        json.dump(results, open(result_file, 'w'))
コード例 #4
0
def load_datasets(config, phases):
    config = config['data']

    # 后面就不会执行了
    if 'preprocess' in config and config['preprocess']:
        print('Preprocessing datasets')
        # 对训练数据、验证数据的预处理
        preprocess(data_dir=config['dir'],
                   train_ques_file=config['train']['ques'],
                   train_ans_file=config['train']['ans'],
                   val_ques_file=config['val']['ques'],
                   val_ans_file=config['val']['ans'])

    #处理结束,对数据进行加载
    print('Loading preprocessed datasets')
    datafiles = {x: '{}.pkl'.format(x)
                 for x in phases
                 }  # datafile = {'train': 'train.pkl', 'val': 'val.pkl'}
    raw_images = 'preprocess' in config['images'] and config['images'][
        'preprocess']

    if raw_images:
        img_dir = {x: config[x]['img_dir'] for x in phases}
    else:
        img_dir = {x: config[x]['emb_dir'] for x in phases}

    datasets = {
        x: VQADataset(data_dir=config['dir'],
                      qafile=datafiles[x],
                      img_dir=img_dir[x],
                      phase=x,
                      img_scale=config['images']['scale'],
                      img_crop=config['images']['crop'],
                      raw_images=raw_images)
        for x in phases
    }
    print(datasets['train'][0])
    print(datasets['train'][0])

    batch_samplers = {
        x: VQABatchSampler(datasets[x], config[x]['batch_size'])
        for x in phases
    }

    #直接调用的DataLoader API
    dataloaders = {
        x: DataLoader(datasets[x],
                      batch_sampler=batch_samplers[x],
                      num_workers=config['loader']['workers'])
        for x in phases
    }
    dataset_sizes = {x: len(datasets[x]) for x in phases}

    print("ques vocab size: {}".format(len(VQADataset.ques_vocab)))
    print("ans vocab size: {}".format(len(VQADataset.ans_vocab)))
    return dataloaders, VQADataset.ques_vocab, VQADataset.ans_vocab
コード例 #5
0
def _sample(indices, config):
    dataset = VQADataset(config.dataset_dir,
                         output_shape=[256, 256],
                         train=False)

    if config.action == "stage1":
        stage1_generator = Generator()
        _load(stage1_generator, os.path.join(config.model_dir, "stage1"))
    else:
        stage1_generator = Stage1Generator()
        stage2_generator = Stage2Generator()
        _load(stage1_generator, os.path.join(config.model_dir, "stage1"))
        _load(stage2_generator, os.path.join(config.model_dir, "stage2"))

    ims, embeds, captions = [], [], []
    for idx in indices:
        im, embed, caption = dataset[idx]
        ims.append(im)
        embeds.append(embed)
        captions.append(caption)

    ims = torch.stack(ims, 0)
    embeds = torch.stack(embeds, 0)
    noise = Variable(torch.randn(len(indices), 100))

    if config.cuda:
        noise = noise.cuda()
        embeds = Variable(embeds).cuda()
    else:
        embeds = Variable(embeds)
    embeds = embeds.view(len(indices), -1)

    fake_ims_stage1 = stage1_generator(noise, embeds)
    torchvision.utils.save_image(ims,
                                 "{}/real.png".format(config.sample_dir),
                                 normalize=True)
    torchvision.utils.save_image(fake_ims_stage1.data,
                                 "{}/fake_stage1.png".format(
                                     config.sample_dir),
                                 normalize=True)

    if config.action == "stage2":
        fake_ims_stage2 = stage2_generator(fake_ims_stage1, embeds)
        torchvision.utils.save_image(fake_ims_stage2.data,
                                     "{}/fake_stage2.png".format(
                                         config.sample_dir),
                                     normalize=True)

    _file = open("{}/captions.txt".format(config.sample_dir), "w")
    for i, caption in enumerate(captions):
        _file.write("index: {}\n".format(indices[i]))
        for c in caption:
            _file.write(c + "\n")
        _file.write("\n")
    _file.close()
コード例 #6
0
def load_datasets(config, phases):
    datasets = {
        x: VQADataset(mode=x, preprocess=config['data']['preprocess'])
        for x in phases
    }
    batch_samplers = {
        x: VQABatchSampler(datasets[x], config['data']['batch_size'])
        for x in phases
    }
    num_workers = config['data']['num_workers']
    dataloaders = {
        x: DataLoader(datasets[x],
                      batch_sampler=batch_samplers[x],
                      num_workers=num_workers)
        for x in phases
    }
    print("dataset size", {x: len(datasets[x]) for x in phases})
    print("ques vocab size: {}".format(len(VQADataset.ques_vocab)))
    print("ans vocab size: {}".format(len(VQADataset.ans_vocab)))
    return dataloaders, VQADataset.ques_vocab, VQADataset.ans_vocab
コード例 #7
0
def main():
    global args
    args = parser.parse_args()
    args_str = json.dumps(vars(args), indent=2)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # use timestamp as log subdirectory
    timestamp = args.timestamp
    cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp)
    model_group_name, model_name = args.model.split('/')

    # setting log handlers
    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)
    logger.addHandler(sh)
    logger.debug('[Info] called with: ' + args_str)
    logger.debug('[Info] timestamp: ' + timestamp)

    # select device
    torch.cuda.set_device(args.gpu_id)
    logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device()))

    # data
    assert (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('val2014'))
    logger.debug('[Info] init dataset')
    val_set = VQADataset('test', model_group_name)
    RES_DIR = '/home/lyt/code/bert-as-service-test/result'
    queIds, queFea, _ = load_data(split_name='val2014', RES_DIR=RES_DIR)
    assert queIds.tolist() == val_set.que_id.tolist()
    logger.debug('[Info] Clustering using {}, {} clusters'.format(
        args.cluster_alg, args.n_clusters))
    clusfilename = '{}/{}/{}_{}_n{}.pkl'.format(RES_DIR, 'v2', 'train2014',
                                                args.cluster_alg,
                                                args.n_clusters)
    logger.debug('[Info] cluster file: {}'.format(clusfilename))
    val_qTypeLabels = clustering(queFea,
                                 clu_num=args.n_clusters,
                                 clu_alg=args.cluster_alg,
                                 savefname=clusfilename)

    # model
    logger.debug('[Info] construct model')
    model_group = import_module('models.' + model_group_name)
    model = getattr(model_group, model_name)(num_words=val_set.num_words,
                                             num_ans=val_set.num_ans,
                                             emb_size=get_emb_size())
    logger.debug('[Info] model name: ' + args.model)
    total_param = 0
    for param in model.parameters():
        total_param += param.nelement()
    logger.debug('[Info] total parameters: {}M'.format(
        math.ceil(total_param / 2**20)))
    model.cuda()
    cudnn.benchmark = True

    # load best model, predict
    logger.debug('[Info] load model ...')
    best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar')
    #if os.path.isfile(best_path):
    assert os.path.isfile(best_path)
    logger.debug("[Info] loading checkpoint '{}'".format(best_path))
    cp_state = torch.load(best_path)
    best_acc = cp_state['best_acc']
    logger.debug('[Info] best model with best acc {}'.format(best_acc))
    model.load_state_dict(cp_state['state_dict'])
    #else:
    #    logger.debug("[Info] no checkpoint found at '{}'".format(best_path))

    for i in range(args.n_clusters):
        logger.debug('[Info] choose cluster ID: {}'.format(i))
        #sel = val_qTypeLabels == args.clus_id
        sel = val_qTypeLabels == i
        val_quesIds = queIds[sel].tolist()
        logger.debug(
            '[Info] #Val set before/after clustering and choosing {}/{}'.
            format(queIds.shape[0], len(val_quesIds)))
        val_set_sub = select_subset(val_set, sel)
        val_loader = torch.utils.data.DataLoader(val_set_sub,
                                                 batch_size=args.bs,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
        logger.debug('sample count: {}'.format(len(val_set_sub)))
        acc = validate(val_loader, model, None, None, quesIds=val_quesIds)
        logger.debug('Evaluate Result:\tAcc  {0}'.format(acc))
コード例 #8
0
def main():
    global args
    args = parser.parse_args()
    args_str = json.dumps(vars(args), indent=2)

    # use timestamp as log subdirectory
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp)
    os.mkdir(cfg.LOG_DIR)
    json.dump(cfg, open(cfg.LOG_DIR + '/config.json', 'w'), indent=2)
    model_group_name, model_name = args.model.split('/')
    shutil.copy('models/' + model_group_name + '.py', cfg.LOG_DIR)

    # init ploter
    ploter = Ploter(timestamp)

    # setting log handlers
    fh = logging.FileHandler(os.path.join(cfg.LOG_DIR, 'log'))
    fh.setLevel(logging.DEBUG)
    fhc = logging.FileHandler('current.log')
    fhc.setLevel(logging.DEBUG)
    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)

    fmt = '[%(asctime)-15s] %(message)s'
    datefmt = '%Y-%m-%d %H:%M:%S'
    formatter = logging.Formatter(fmt, datefmt)

    fh.setFormatter(formatter)
    fhc.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(fhc)
    logger.addHandler(sh)
    logger.debug('[Info] called with: ' + args_str)

    logger.debug('[Info] timestamp: ' + timestamp)
    logger.debug('[Info] CPU random seed: {}'.format(torch.initial_seed()))
    logger.debug('[Info] GPU random seed: {}'.format(torch.cuda.initial_seed()))

    # select device
    torch.cuda.set_device(args.gpu_id)
    logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device()))

    # display some information
    train_pattern = '[Info] Training pattern: {}\n'\
        '\t[train_subset by atype, no finetune, train_all_ft_atype]'
    if args.ft_epoch == 0:
        logger.debug(train_pattern.format('train_subset by atype'))
    elif args.ft_epoch > args.epochs:
        logger.debug(train_pattern.format('no finetune'))
    else: #0<ft_epoch<epochs
        logger.debug(train_pattern.format('train_all_ft_atype'))
    resume_train = '[Info] Resume train: {}'
    if args.resume:
        logger.debug(resume_train.format('resume train from previous best model'))
    else:
        logger.debug(resume_train.format('normal train'))
    pred_sub = '[Info] Predict subset: {}'
    if args.pred_subset:
        logger.debug(pred_sub.format('validate on subset by atype'))
    else:
        logger.debug(pred_sub.format('validate on all val set'))

    # load data
    logger.debug('[Info] init dataset')
    do_test = (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('train2014', 'val2014'))
    trn_set = VQADataset('train', model_group_name)
    train_loader = gen_dataloader(args, trn_set, shuffle=True)
    if do_test:
        val_set = VQADataset('test', model_group_name)
        val_loader = gen_dataloader(args, val_set, shuffle=False)

    # model
    emb_size = 300
    if cfg.WORD_EMBEDDINGS:
        word_vec = merge_embeddings(cfg.WORD_EMBEDDINGS)
        aword = next(iter(word_vec))
        emb_size = len(word_vec[aword])
        logger.debug('[Info] embedding size: {}'.format(emb_size))

    logger.debug('[Info] construct model, criterion and optimizer')
    model_group = import_module('models.' + model_group_name)
    model = getattr(model_group, model_name)(
            num_words=trn_set.num_words,
            num_ans=trn_set.num_ans,
            emb_size=emb_size)
    logger.debug('[Info] model name: ' + args.model)
    total_param = 0
    for param in model.parameters():
        total_param += param.nelement()
    logger.debug('[Info] total parameters: {}M'.format(math.ceil(total_param / 2**20)))

    # initialize word embedding with pretrained
    if cfg.WORD_EMBEDDINGS:
        emb = model.we.weight.data.numpy()
        words = trn_set.codebook['itow']
        assert '<PAD>' not in word_vec
        fill_cnt = 0
        for i, w in enumerate(words):
            if w in word_vec:
                emb[i] = word_vec[w]
                fill_cnt += 1
        logger.debug('[debug] word embedding filling count: {}/{}'
                .format(fill_cnt, len(words)))
        model.we.weight = nn.Parameter(torch.from_numpy(emb))
        if model_group_name in ('onehot_label', 'prob_label'):
            # initialize object embedding with pretrained
            obj_emb = model.obj_net[0].weight.data.numpy()
            if model_group_name == 'prob_label':
                obj_emb = obj_emb.T
            fill_cnt = 0
            for i, line in enumerate(trn_set.objects_vocab):
                avail, vec = get_class_embedding(line, word_vec, emb_size)
                if avail:
                    obj_emb[i] = vec
                    fill_cnt += 1
            logger.debug('[debug] class embedding filling count: {}/{}'
                    .format(fill_cnt, len(trn_set.objects_vocab)))
            if model_group_name == 'prob_label':
                obj_emb = obj_emb.T
            model.obj_net[0].weight = nn.Parameter(torch.from_numpy(obj_emb))

    model.cuda()

    if cfg.SOFT_LOSS:
        criterion = nn.BCEWithLogitsLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss().cuda()
    logger.debug('[Info] criterion name: ' + criterion.__class__.__name__)
    optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.wd)
    cudnn.benchmark = True

    # resume
    if args.resume:
        ckpt = torch.load(os.path.join(cfg.LOG_DIR.split('/')[0],
                            args.ts, 'model-best.pth.tar'))
        best_acc = ckpt['best_acc']
        start_epoch = best_epoch = ckpt['best_epoch']
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])
    else:
        ckpt = None
        best_acc = 0
        best_epoch = -1
        start_epoch = args.start_epoch # -1

    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_freq, 
    #                        gamma=args.lr_decay_factor, last_epoch=start_epoch)

    # train
    logger.debug('[Info] start training...')
    for epoch in range(start_epoch+1, args.epochs):
        is_best = False
        lr = adjust_learning_rate(optimizer, epoch)
        ploter.append(epoch, lr, 'lr')

        if epoch == args.ft_epoch:
            logger.debug('[Info] finetune using atype_id {} data, at epoch {}'
                .format(args.atype_id, args.ft_epoch))
            # load atypes
            RES_DIR = '/home/lyt/code/bert-as-service-test/result'
            queIds, aTypeIds = load_data(split_name='train2014', RES_DIR=RES_DIR)
            assert queIds.tolist() == trn_set.que_id.tolist()
            # select specified data for training
            sel = aTypeIds == args.atype_id
            trn_quesIds = queIds[sel].tolist()
            logger.debug('[Info] #Train set before/after selecting {}/{}'
                .format(queIds.shape[0], len(trn_quesIds)))
            trn_set = select_subset(trn_set, sel)
            # set train loader
            train_loader = gen_dataloader(args, trn_set, shuffle=True)
            # validation set
            if do_test and args.pred_subset:
                # load atypes
                queIds, aTypeIds = load_data(split_name='val2014', RES_DIR=RES_DIR)
                assert queIds.tolist() == val_set.que_id.tolist()
                # select specified data for training
                sel = aTypeIds == args.atype_id
                val_quesIds = queIds[sel].tolist()
                logger.debug('[Info] #Val set before/after selecting {}/{}'
                    .format(queIds.shape[0], len(val_quesIds)))
                val_set = select_subset(val_set, sel)
                # set val loader
                val_loader = gen_dataloader(args, val_set, shuffle=False)


        loss = train(train_loader, model, criterion, optimizer, epoch)
        ploter.append(epoch, loss, 'train-loss')

        if do_test:
            if args.pred_subset:
                acc = validate(val_loader, model, criterion, epoch, quesIds=val_quesIds)
            else:
                acc = validate(val_loader, model, criterion, epoch)
            ploter.append(epoch, acc, 'val-acc')
            if acc > best_acc:
                is_best = True
                best_acc = acc
                best_epoch = epoch
            logger.debug('Evaluate Result:\tAcc  {0}\tBest {1} ({2})'
                .format(acc, best_acc, best_epoch))

        # save checkpoint
        state = {
            'epoch': epoch,
            'best_acc': best_acc,
            'best_epoch': best_epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        if epoch % args.save_freq == 0:
            cp_fname = 'checkpoint-{:03}.pth.tar'.format(epoch)
            cp_path = os.path.join(cfg.LOG_DIR, cp_fname)
            torch.save(state, cp_path)
        if is_best:
            best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar')
            torch.save(state, best_path)
コード例 #9
0
ファイル: ensemble.py プロジェクト: yourtone/concept_vqa
def main():
    args = parser.parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # select device
    torch.cuda.set_device(args.gpu_id)
    print('[Info] use gpu: {}'.format(torch.cuda.current_device()))

    # get parameters
    sys.path.insert(0, args.model_dir)
    from params import params
    assert len(params) > 1

    last_cfg = params[0][-1]
    last_cfg()
    get_data.main()
    dataset = VQADataset('test', params[0][1])
    itoa = dataset.codebook['itoa']

    vote_buff = [{} for i in range(len(dataset))]
    conf_buff = np.zeros((len(dataset), len(itoa)))
    sm_conf_buff = np.zeros((len(dataset), len(itoa)))
    l2_conf_buff = np.zeros((len(dataset), len(itoa)))
    que_ids = dataset.que_id
    for fpath, mgrp, mname, acc, cfg_func, in params:
        # data
        if cfg_func != last_cfg:
            cfg_func()
            get_data.main()
            last_cfg = cfg_func
            dataset = VQADataset('test', mgrp)
            itoa = dataset.codebook['itoa']

        dataset.reload_obj(mgrp)
        dataloader = torch.utils.data.DataLoader(
                dataset, batch_size=args.bs, shuffle=False,
                num_workers=2, pin_memory=True)

        # model
        model_group = import_module('models.' + mgrp)
        model = getattr(model_group, mname)
                num_words=dataset.num_words,
                num_ans=dataset.num_ans,
                emb_size=get_emb_size())
        cp_file = os.path.join(args.model_dir, fpath)
        checkpoint = torch.load(cp_file, map_location=lambda s, l: s.cuda(0))
        model.load_state_dict(checkpoint['state_dict'])
        model.cuda()
        model.eval()

        # predict
        bar = progressbar.ProgressBar()
        start = 0
        # sample: (que_id, img, que, [obj])
        for sample in bar(dataloader):
            sample_var = [Variable(d).cuda() for d in list(sample)[1:]]
            score = model(*sample_var)
            sm_score = torch.nn.functional.softmax(score)
            l2_score = torch.nn.functional.normalize(score)

            bs = score.size(0)
            conf_buff[start:start+bs] += score.data.cpu().numpy()
            sm_conf_buff[start:start+bs] += sm_score.data.cpu().numpy()
            l2_conf_buff[start:start+bs] += l2_score.data.cpu().numpy()

            _, ans_ids = torch.max(score.data, dim=1)
            for i, ans_id in enumerate(ans_ids):
                ans = itoa[ans_id]
                ans_score = acc + vote_buff[start + i].get(ans, 0)
                vote_buff[start + i][ans] = ans_score

            start += bs
コード例 #10
0
ファイル: vqa_model.py プロジェクト: Tomas-Perez/a2ndl-hw
        model = create_model(img_dim, img_dim, 
            num_classes=num_classes, 
            max_seq_length=max_seq_length, 
            embedding_matrix=get_embeddings(),
        )

        if i == 0:
            model.summary()

        print(f"Training for k index={i}/{num_k}")

        dataset = VQADataset(dataset_dir, 'training', text_inputs, num_classes, 
            img_out_shape=[img_dim, img_dim], 
            validation_split=VALIDATION_SPLIT,
            img_preprocessing_function=preprocess_input,
            img_generator=img_data_gen,
            k_idx=i,
        )
        dataset_valid = VQADataset(dataset_dir, 'validation', text_inputs, num_classes, 
            img_out_shape=[img_dim, img_dim], 
            validation_split=VALIDATION_SPLIT,
            img_preprocessing_function=preprocess_input,
            img_generator=img_data_gen,
            k_idx=i,
        )

        train_dataset = tf.data.Dataset.from_generator(
            lambda: dataset,
            output_types=((tf.float32, tf.float32), tf.int32),
            output_shapes=(([img_dim, img_dim, 3], [max_seq_length]), []),
コード例 #11
0
ファイル: train.py プロジェクト: SunnyWay/concept_vqa
def main():
    global args
    args = parser.parse_args()
    args_str = json.dumps(vars(args), indent=2)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # use timestamp as log subdirectory
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp)
    os.mkdir(cfg.LOG_DIR)
    json.dump(cfg, open(cfg.LOG_DIR + '/config.json', 'w'), indent=2)
    model_group_name, model_name = args.model.split('/')
    shutil.copy('models/' + model_group_name + '.py', cfg.LOG_DIR)

    # init ploter
    ploter = Ploter(timestamp)

    # setting log handlers
    fh = logging.FileHandler(os.path.join(cfg.LOG_DIR, 'log'))
    fh.setLevel(logging.DEBUG)
    fhc = logging.FileHandler('current.log')
    fhc.setLevel(logging.DEBUG)
    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)

    fmt = '[%(asctime)-15s] %(message)s'
    datefmt = '%Y-%m-%d %H:%M:%S'
    formatter = logging.Formatter(fmt, datefmt)

    fh.setFormatter(formatter)
    fhc.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(fhc)
    logger.addHandler(sh)
    logger.debug('[Info] called with: ' + args_str)

    logger.debug('[Info] timestamp: ' + timestamp)
    logger.debug('[Info] CPU random seed: {}'.format(torch.initial_seed()))
    logger.debug('[Info] GPU random seed: {}'.format(
        torch.cuda.initial_seed()))

    # select device
    torch.cuda.set_device(args.gpu_id)
    logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device()))

    # data
    logger.debug('[Info] init dataset')
    do_test = (len(cfg.TEST.SPLITS) == 1
               and cfg.TEST.SPLITS[0] in ('train2014', 'val2014'))
    trn_set = VQADataset('train', model_group_name)
    train_loader = torch.utils.data.DataLoader(trn_set,
                                               batch_size=args.bs,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    if do_test:
        val_set = VQADataset('test', model_group_name)
        val_loader = torch.utils.data.DataLoader(val_set,
                                                 batch_size=args.bs,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    # model
    emb_size = 300
    if cfg.WORD_EMBEDDINGS:
        word_vec = merge_embeddings(cfg.WORD_EMBEDDINGS)
        aword = next(iter(word_vec))
        emb_size = len(word_vec[aword])
        logger.debug('[Info] embedding size: {}'.format(emb_size))

    logger.debug('[Info] construct model, criterion and optimizer')
    model_group = import_module('models.' + model_group_name)
    model = getattr(model_group, model_name)(num_words=trn_set.num_words,
                                             num_ans=trn_set.num_ans,
                                             emb_size=emb_size)
    logger.debug('[Info] model name: ' + args.model)
    total_param = 0
    for param in model.parameters():
        total_param += param.nelement()
    logger.debug('[Info] total parameters: {}M'.format(
        math.ceil(total_param / 2**20)))

    # initialize word embedding with pretrained
    if cfg.WORD_EMBEDDINGS:
        emb = model.we.weight.data.numpy()
        words = trn_set.codebook['itow']
        assert '<PAD>' not in word_vec
        fill_cnt = 0
        for i, w in enumerate(words):
            if w in word_vec:
                emb[i] = word_vec[w]
                fill_cnt += 1
        logger.debug('[debug] word embedding filling count: {}/{}'.format(
            fill_cnt, len(words)))
        model.we.weight = nn.Parameter(torch.from_numpy(emb))

        if model_group_name in ('onehot_label', 'prob_label'):
            # initialize object embedding with pretrained
            obj_emb = model.obj_net[0].weight.data.numpy()

            if model_group_name == 'prob_label':
                obj_emb = obj_emb.T

            fill_cnt = 0
            for i, line in enumerate(trn_set.objects_vocab):
                avail, vec = get_class_embedding(line, word_vec, emb_size)
                if avail:
                    obj_emb[i] = vec
                    fill_cnt += 1
            logger.debug('[debug] class embedding filling count: {}/{}'.format(
                fill_cnt, len(trn_set.objects_vocab)))

            if model_group_name == 'prob_label':
                obj_emb = obj_emb.T

            model.obj_net[0].weight = nn.Parameter(torch.from_numpy(obj_emb))

    model.cuda()

    if cfg.SOFT_LOSS:
        criterion = nn.BCEWithLogitsLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss().cuda()
    logger.debug('[Info] criterion name: ' + criterion.__class__.__name__)
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.wd)
    cudnn.benchmark = True

    # train
    logger.debug('[Info] start training...')
    is_best = False
    best_acc = 0
    best_epoch = -1
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch)
        ploter.append(epoch, lr, 'lr')

        loss = train(train_loader, model, criterion, optimizer, epoch)
        ploter.append(epoch, loss, 'train-loss')

        if do_test:
            acc = validate(val_loader, model, criterion, epoch)
            ploter.append(epoch, acc, 'val-acc')
            if acc > best_acc:
                is_best = True
                best_acc = acc
                best_epoch = epoch

            logger.debug('Evaluate Result:\t'
                         'Acc  {0}\t'
                         'Best {1} ({2})'.format(acc, best_acc, best_epoch))

        # save checkpoint
        cp_fname = 'checkpoint-{:03}.pth.tar'.format(epoch)
        cp_path = os.path.join(cfg.LOG_DIR, cp_fname)
        state = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict()
        }
        if epoch % args.save_freq == 0:
            torch.save(state, cp_path)
        if is_best:
            best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar')
            torch.save(state, best_path)
コード例 #12
0
ファイル: run.py プロジェクト: chiefpi/co-attention-vqa
            ibatch = batch['image'].permute(0, 3, 1, 2).float().to(device)
            qbatch = token2id(batch['question'], vocab.qvocab).to(device)
            output = model(ibatch, qbatch)
            aids = output.argmax(-1).tolist()
            answers = id2answer(aids, vocab.avocab)

            for qid, answer in zip(idbatch, answers):
                results.append({'question_id': qid, 'answer': answer})

    with open('results.json', 'w') as f:
        json.dump(results, f)
    print('Finished evaluation!')


if __name__ == "__main__":
    dataset_train = VQADataset(data_dir, 'train')
    dataset_val = VQADataset(data_dir, 'val')
    dataset_test = VQADataset(data_dir, 'test')

    vocab = VQAVocab(data_dir)

    model = CoNet(len(vocab.qvocab),
                  emb_dim,
                  len(vocab.avocab),
                  useco=use_coatt).to(device)

    for name, param in model.named_parameters():
        print(name, param.requires_grad, param.is_cuda, param.size())
        # assert param.is_cuda

#     train(model, dataset_train, dataset_val, vocab)
コード例 #13
0
ファイル: main.py プロジェクト: Kudlech/VQA
def main(cfg: DictConfig) -> None:
    """
    Run the code following a given configuration
    :param cfg: configuration file retrieved from hydra framework
    """
    main_utils.init(cfg)
    logger = TrainLogger(exp_name_prefix=cfg['main']['experiment_name_prefix'],
                         logs_dir=cfg['main']['paths']['logs'])
    logger.write(OmegaConf.to_yaml(cfg))

    # Set seed for results reproduction
    main_utils.set_seed(cfg['main']['seed'])

    # Load dataset
    path_image_train = '/datashare/train2014/COCO_train2014_'
    path_question_train = '/datashare/v2_OpenEnded_mscoco_train2014_questions.json'
    train_dataset = VQADataset(path_answers=cfg['main']['paths']['train'],
                               path_image=path_image_train,
                               path_questions=path_question_train)
    path_image_val = '/datashare/val2014/COCO_val2014_'
    path_question_val = '/datashare/v2_OpenEnded_mscoco_val2014_questions.json'
    val_dataset = VQADataset(path_answers=cfg['main']['paths']['validation'],
                             path_image=path_image_val,
                             path_questions=path_question_val,
                             word_dict=train_dataset.word_dict)

    train_loader = DataLoader(train_dataset,
                              cfg['train']['batch_size'],
                              shuffle=True,
                              num_workers=cfg['main']['num_workers'])
    eval_loader = DataLoader(val_dataset,
                             cfg['train']['batch_size'],
                             shuffle=True,
                             num_workers=cfg['main']['num_workers'])

    image_dim = train_dataset.pic_size
    output_dim = 2410  # possible answers
    model = VQAModel(batch_size=cfg['train']['batch_size'],
                     word_vocab_size=train_dataset.vocab_size,
                     lstm_hidden=cfg['train']['num_hid'],
                     output_dim=output_dim,
                     dropout=cfg['train']['dropout'],
                     word_embedding_dim=cfg['train']['word_embedding_dim'],
                     question_output_dim=cfg['train']['question_output_dim'],
                     image_dim=image_dim,
                     last_hidden_fc_dim=cfg['train']['last_hidden_fc_dim'])

    if cfg['main']['parallel']:
        model = torch.nn.DataParallel(model)

    if torch.cuda.is_available():
        model = model.cuda()

    logger.write(main_utils.get_model_string(model))

    # Run model
    train_params = train_utils.get_train_params(cfg)

    # Report metrics and hyper parameters to tensorboard
    metrics = train(model, train_loader, eval_loader, train_params, logger)
    hyper_parameters = main_utils.get_flatten_dict(cfg['train'])

    logger.report_metrics_hyper_params(hyper_parameters, metrics)
コード例 #14
0
    def __init__(self, model_info, split, save_dir):
        assert len(model_info) > 0
        assert len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] == split

        model_info = sorted(model_info, key=itemgetter(0))

        self._split = split
        self.model_info = model_info
        self.save_dir = save_dir

        # load model
        self._pred_ans = []
        self._scores = []
        self._att_weights = []
        dataset = VQADataset('test', model_info[0][0])
        emb_size = get_emb_size()
        for model_group_name, model_name, cp_file in model_info:
            cache_file = cp_file + '.cache'
            if os.path.isfile(cache_file):
                print("load from cache: '{}".format(cache_file))
                cache = pickle.load(open(cache_file, 'rb'))
                self._pred_ans.append(cache['pred_ans'])
                self._scores.append(cache['scores'])
                self._att_weights.append(cache['att_weights'])
                continue

            # dataset
            dataset.reload_obj(model_group_name)
            dataloader = torch.utils.data.DataLoader(dataset,
                                                     batch_size=args.bs,
                                                     shuffle=False,
                                                     num_workers=2,
                                                     pin_memory=True)
            # model
            model_group = import_module('models.' + model_group_name)
            model = getattr(model_group,
                            model_name)(num_words=dataset.num_words,
                                        num_ans=dataset.num_ans,
                                        emb_size=emb_size)
            checkpoint = torch.load(cp_file,
                                    map_location=lambda s, l: s.cuda(0))
            model.load_state_dict(checkpoint['state_dict'])
            model.cuda()
            model.eval()

            # predicting
            itoa = dataloader.dataset.codebook['itoa']
            batch_att_weight = []
            pred_ans = []
            bar = progressbar.ProgressBar()
            print('predicting answers...')
            # sample: (que_id, img, que, [obj])
            for sample in bar(dataloader):
                # setting hook
                att_weight_buff = torch.FloatTensor(len(sample[0]), 36)

                def get_weight(self, input, output):
                    att_weight_buff.copy_(output.data.view_as(att_weight_buff))

                hook = model.att_net.register_forward_hook(get_weight)

                # forward
                sample_var = [Variable(d).cuda() for d in list(sample)[1:]]
                score = model(*sample_var)
                att_weight = F.softmax(Variable(att_weight_buff)).data.numpy()
                batch_att_weight.append(att_weight)
                pred_ans.extend(format_result(sample[0], score, itoa))

                hook.remove()
            att_weights = np.vstack(batch_att_weight)

            # evaluation
            print('evaluting results...')
            if split in ('train2014', 'val2014'):
                vqa_eval = get_eval(pred_ans, split)
                scores = []
                for i in range(len(dataset)):
                    qid = int(dataset[i][0])
                    score = vqa_eval.evalQA.get(qid)
                    scores.append(score)
            else:
                scores = None

            self._pred_ans.append(pred_ans)
            self._scores.append(scores)
            self._att_weights.append(att_weights)

            # save cache
            cache = {}
            cache['pred_ans'] = pred_ans
            cache['scores'] = scores
            cache['att_weights'] = att_weights
            pickle.dump(cache, open(cache_file, 'wb'))

        print('done.')

        # load data
        print('load raw data...')
        split_fname = '{}/raw-{}.json'.format(cfg.DATA_DIR, split)
        self._data = json.load(open(split_fname))
        print('load boxes...')
        self._boxes = self._load_box()

        # query key
        self._question = None
        self._answer = None
        self._condition = None

        # query result
        self._r_question = None
        self._r_answer = None
        self._r_condition = None

        # dirty flag
        self._d_question = True
        self._d_answer = True
        self._d_condition = True

        self.last_results = None