Example #1
0
def test(model, data, device):
    model.eval()
    results = []
    question_ids = []
    for batch in tqdm(data, total=len(data)):
        coco_ids, q_ids, answers, *batch_input = [todevice(x, device) for x in batch]
        logits, loss_time = model(*batch_input)
        predicts = torch.max(logits, dim=1)[1]
        for predict in predicts:
            results.append(data.vocab['answer_idx_to_token'][predict.item()])
        for q_id in q_ids:
            question_ids.append(q_id.item())
    return results, question_ids
Example #2
0
def val_with_acc(model, data, device):
    model.eval()
    question_ids = []
    accs = []
    total_acc, count = 0, 0
    for batch in tqdm(data, total=len(data)):
        coco_ids, q_ids, answers, *batch_input = [todevice(x, device) for x in batch]
        logits, loss_time = model(*batch_input)
        batch_acc = batch_accuracy(logits, answers)
        predicts = torch.max(logits, dim=1)[1]
        for q_id in q_ids:
            question_ids.append(q_id.item())
        for acc in batch_acc:
            accs.append(acc.item())
    return accs, question_ids
Example #3
0
def validate(model, data, device):
    count, correct = 0, 0
    model.eval()
    print('validate...')
    total_acc, count = 0, 0
    with torch.no_grad():
        for batch in tqdm(data, total=len(data)):
            coco_ids, q_ids, answers, *batch_input = [todevice(x, device) for x in batch]
            batch_input = [x.detach() for x in batch_input]
            logits, loss_time = model(*batch_input)
            acc = batch_accuracy(logits, answers)
            total_acc += acc.sum().data.item()
            count += answers.size(0)
    acc = total_acc / count
    return acc
Example #4
0
def validate(model, data, device, detail=False):
    count, correct = 0, 0
    beta = 1.
    model.eval()
    print('validate...')
    for batch in tqdm(data, total=len(data)):
        orig_idx, image_idx, answers, *batch_input = [
            todevice(x, device) for x in batch
        ]
        logits, loss_t = model(*batch_input)
        predicts = logits.max(1)[1]
        correct += torch.eq(predicts, answers).long().sum().item()
        count += answers.size(0)

    acc = correct / count
    return acc
Example #5
0
def train(args):
    logging.info("Create train_loader and val_loader.........")
    train_loader_kwargs = {
        'question_pt': args.train_question_pt,
        'vocab_json': args.vocab_json,
        'feature_h5': args.train_feature_h5,
        'batch_size': args.batch_size,
        'num_workers': 4,
        'shuffle': True
    }
    train_loader = CLEVRDataLoader(**train_loader_kwargs)
    if args.val:
        val_loader_kwargs = {
            'question_pt': args.val_question_pt,
            'vocab_json': args.vocab_json,
            'feature_h5': args.val_feature_h5,
            'batch_size': args.batch_size,
            'num_workers': 2,
            'shuffle': False
        }
        val_loader = CLEVRDataLoader(**val_loader_kwargs)

    logging.info("Create model.........")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_kwargs = {
        'vocab': train_loader.vocab,
        'dim_word': args.dim_word,
        'dim_hidden': args.hidden_size,
        'dim_vision': args.dim_vision,
        'state_size': args.state_size,
        'mid_size': args.mid_size,
        'dropout_prob': args.dropout,
        'glimpses': args.glimpses,
        'dim_edge': args.dim_edge
    }
    model_kwargs_tosave = { k:v for k,v in model_kwargs.items() if k != 'vocab' }
    model = Net(**model_kwargs)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model).to(device)  # Support multiple GPUS
    else:
        model = model.to(device)
    logging.info(model)
    ################################################################

    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adamax(parameters, args.lr, weight_decay=0)

    start_epoch = 0
    if args.restore:
        print("Restore checkpoint and optimizer...")
        ckpt = os.path.join(args.save_dir, 'model.pt')
        ckpt = torch.load(ckpt, map_location={'cuda:0': 'cpu'})
        start_epoch = 4
        if torch.cuda.device_count() > 1:
            model.module.load_state_dict(ckpt['state_dict'])
        else:
            model.load_state_dict(ckpt['state_dict'])
        # optimizer.load_state_dict(ckpt['optimizer'])
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.5**(1 / args.lr_halflife))
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[8, 12, 15, 17, 19, 22], gamma=0.5)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5)
    gradual_warmup_steps = [0.25 * args.lr, 0.5 * args.lr, 0.75 * args.lr, 1.0 * args.lr]
    criterion = nn.CrossEntropyLoss().to(device)
    last_acc = 0.
    logging.info("Start training........")
    for epoch in range(start_epoch, args.num_epoch):
        model.train()
        if epoch < len(gradual_warmup_steps):
            utils.set_lr(optimizer, gradual_warmup_steps[epoch])
        else:
            scheduler.step()
        for p in optimizer.param_groups:
            lr_rate = p['lr']
            logging.info("Learning rate: %6f" % (lr_rate))
        for i, batch in enumerate(train_loader):
            progress = epoch+i/len(train_loader)
            orig_idx, image_idx, answers, *batch_input = [todevice(x, device) for x in batch]
            batch_input = [x.detach() for x in batch_input]
            logits, loss_time = model(*batch_input)
            ##################### loss #####################
            ce_loss = criterion(logits, answers)
            loss_time = 0.01 * loss_time.mean()
            loss = ce_loss + loss_time
            ################################################
            optimizer.zero_grad()
            loss.backward() 
            nn.utils.clip_grad_value_(parameters, clip_value=0.25)
            optimizer.step()
            if (i+1) % (len(train_loader) // 20) == 0:
                logging.info("Progress %.3f  ce_loss = %.3f  time_loss = %.3f" % (progress, ce_loss.item(), loss_time.item()))
            del  answers, batch_input, logits
            torch.cuda.empty_cache()
        # save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt')) 
        logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir))
        if args.val:
            if epoch % 1 ==0:
                valid_acc = validate(model, val_loader, device)
                logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc)
                if valid_acc >= last_acc:
                    last_acc = valid_acc
                    save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt'))