예제 #1
0
def train(opt, train_loader, model, epoch, vocab, val_loader):
    # average meters to record the training statistics
    train_logger = LogCollector()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    nbatch = len(train_loader)
    # switch to train mode
    end = time.time()
    model.n_word_img = 0
    model.n_word_txt = 0
    model.n_sent = 0
    model.s_time = end
    model.all_stats_img = [[0., 0., 0.]]
    model.all_stats_txt = [[0., 0., 0.]]
    for train_data in tqdm(train_loader):
        # Always reset to train mode
        model.train()
        # measure data loading time
        data_time.update(time.time() - end)
        # make sure train logger is used
        model.logger = train_logger
        # Update the model
        info = model.forward(*train_data, epoch=epoch)
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        # Print log info
        if model.niter % opt.log_step == 0:
            logger.info(
                'Epoch: [{0}] {e_log} {info}'
                .format(
                    epoch,  e_log=str(model.logger), info=info
                )
            )
예제 #2
0
def train(opt, train_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    progbar = Progbar(train_loader.dataset.length)
    end = time.time()
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        b_size, loss = model.train_emb(*train_data)
        # print loss
        progbar.add(b_size, values=[("loss", loss)])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
예제 #3
0
def train(opt, train_loader, model, epoch, val_loader):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()
    model.epoch = epoch

    end = time.time()
    for i, train_data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        if not opt.use_external_captions:
            model.train_emb(*train_data)
        else:
            model.train_emb_with_extended_captions(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            if not opt.use_external_captions:
                validate(opt, val_loader, model)
            else:
                validate_caption_only(opt, val_loader, model)
예제 #4
0
파일: train.py 프로젝트: pxu4114/CMR
def train(opt, train_loader, model, epoch, val_loader, audio):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    if audio:
        model.train2_start()
    else:
        model.train_start()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        if opt.reset_train:
            # Always reset to train mode, this is not the default behavior
            if audio:
                model.train2_start()
            else:
                model.train_start()
        # pdb.set_trace()
        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_emb(audio, *train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
예제 #5
0
def train(opt, train_loader, model, epoch, val_loader):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()
    best_score = 0
    end = time.time()
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        loss = model.train_emb(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        tb_logger.log_value('train',
                            float(loss.detach().cpu().numpy()),
                            step=model.Eiters)
        tb_logger.log_value('c2c', 0., step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
예제 #6
0
def train(opt, train_loader, model, epoch, val_loader, tb_logger):
    print("start to train")
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()
    # switch to train mode
    model.train_start()

    end = time.time()
    print("start loading data...")
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_emb(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
예제 #7
0
def train(train_loader, model, epoch, tb_logger, log_step=100, val_step=500):
    # Loggers for statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    model.train_start()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        model.logger = train_logger

        # Update model
        model.train_emb(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print info
        if model.Eiters % log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.add_scalar('epoch', epoch, model.Eiters)
        tb_logger.add_scalar('step', i, model.Eiters)
        tb_logger.add_scalar('batch_time', batch_time.val, model.Eiters)
        tb_logger.add_scalar('data_time', data_time.val, model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        """if model.Eiters % val_step == 0:
예제 #8
0
def train(opt, train_loader, model, epoch, val_loader, vocab):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        # Always reset to train mode
        model.train_start()

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_emb(*train_data, epoch=epoch)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logger.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            validate(opt, val_loader, model, vocab)
예제 #9
0
def train(opt, train_loader, model, epoch, val_loader):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    end = time.time()
    for i, train_data in enumerate(train_loader):

        model.train_start()

        data_time.update(time.time() - end)

        model.logger = train_logger

        model.train_emb(*train_data)

        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f}\t'
                # 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                .format(
                    epoch, i, len(train_loader), batch_time=batch_time,
                    # data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            validate(opt, val_loader, model)
예제 #10
0
def train(opt, train_loader, model, epoch, val_loader, best_rsum):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        # if opt.reset_train:
            # Always reset to train mode, this is not the default behavior
        model.train_start()

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_emb(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                .format(
                    epoch, i, len(train_loader), batch_time=batch_time,
                    data_time=data_time, e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            # validate(opt, val_loader, model)

            # evaluate on validation set
            rsum = validate(opt, val_loader, model)

            # remember best R@ sum and save checkpoint
            is_best = rsum > best_rsum
            best_rsum = max(rsum, best_rsum)
            save_checkpoint({
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            }, is_best, prefix=opt.logger_name + '/')


    return best_rsum
예제 #11
0
def joint_train(opt, train_loader, model, val_loader):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()
    best_score = 0
    stop = False
    iters = 0
    langs = opt.lang.split("-")
    # switch to train mode
    model.train_start()
    # Sentencepair is always the last data loader in the list
    if opt.sentencepair:
        sentencepair_loader = train_loader.pop()
        sentencepair_loader_val = val_loader.pop()
    # Call iterator on the DatasetLoader returning DatasetLoaderIterator
    train_loader_its = list(map(iter, train_loader))
    end = time.time()
    patience_count = 0
    if opt.primary:
        primary = opt.primary.split("-")
    while not stop:
        iters += 1
        # Pick a data set and batch
        ind = random.randint(0, len(train_loader) - 1)
        train_cap2cap = random.random(
        ) < opt.sentencepair_p and opt.sentencepair
        if opt.reset_train:
            # Always reset to train mode, this is not the default behavior
            model.train_start()
        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        loss = None
        loss_c2c = None
        # Train caption-caption ranking.
        if train_cap2cap and opt.sentencepair:
            capA, capB, lenA, lenB = next(sentencepair_loader)
            captionsA = Variable(capA)
            captionsB = Variable(capB)
            if torch.cuda.is_available():
                captionsA = captionsA.cuda()
                captionsB = captionsB.cuda()
            # Create permute and inverse permute indices t so t on length
            indsA = np.argsort(np.array(lenA))
            indsB = np.argsort(np.array(lenB))
            revA = np.zeros(len(lenA), dtype='int')
            revB = np.zeros(len(lenA), dtype='int')
            for i in range(len(lenA)):
                revA[indsA[i]] = i
                revB[indsB[i]] = i
            indsA, indsB = torch.LongTensor(indsA), torch.LongTensor(indsB)
            revA, revB = torch.LongTensor(revA), torch.LongTensor(revB)
            if torch.cuda.is_available():
                indsA, indsB = indsA.cuda(), indsB.cuda()
                revA, revB = revA.cuda(), revB.cuda()
            model.Eiters += 1
            model.logger.update('Eit', model.Eiters)
            # Pass length sorted captions for encoding
            capA_emb = model.txt_enc(captionsA[indsA],
                                     sorted(lenA, reverse=True))
            capB_emb = model.txt_enc(captionsB[indsB],
                                     sorted(lenB, reverse=True))
            model.optimizer.zero_grad()
            # Unsort captions for the loss computation
            loss_c2c = model.forward_loss(capA_emb[revA], capB_emb[revB])
            # compute gradient and do SGD step
            loss_c2c.backward()
            if model.grad_clip > 0:
                clip_grad_norm(model.params, model.grad_clip)
            model.optimizer.step()
            # Don't count this as an iter
        # Train image-sentence ranking.
        else:
            tloader = train_loader_its[ind]
            # Call next element of if its exhausted re-init the DatasetLoaderIterators
            try:
                train_data = next(tloader)
            except StopIteration:
                train_loader_its = map(iter, train_loader)
                tloader = train_loader_its[ind]
                train_data = next(tloader)
            loss = model.train_emb(*train_data)
        # Train with sentence-pair ranking batch.
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('step', iters, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)

        if loss is not None:
            tb_logger.log_value('train',
                                float(loss.detach().cpu().numpy()),
                                step=model.Eiters)

        if loss_c2c is not None:
            tb_logger.log_value('c2c',
                                float(loss_c2c.detach().cpu().numpy()),
                                step=model.Eiters)

        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            total_score = 0
            for l, vloader in zip(langs, val_loader):
                with torch.no_grad():
                    score = validate(opt, vloader, model, l)

                if opt.primary:
                    if l in primary:
                        total_score += score
                else:
                    total_score += score
            # Compute val loss on sentencepair task
            if opt.sentencepair:
                # val_loss = sentencepair_eval(model, sentencepair_loader_val)
                # tb_logger.log_value('valid_c2c', val_loss, step=model.Eiters)
                # print('Sentence Pair Val Loss {}'.format(val_loss))
                tb_logger.log_value('valid_c2c', 0., step=model.Eiters)
            else:
                tb_logger.log_value('valid_c2c', 0., step=model.Eiters)

            if total_score > best_score:
                is_best = True
                print("New best: {}".format(total_score))
                best_score = total_score
                patience_count = 0
            else:
                patience_count += 1
                is_best = False
                print("No improvement in {}".format(patience_count))
                if patience_count >= opt.patience:
                    print("No improvement in {} evaluations, stopping".format(
                        patience_count))
                    break
            save_checkpoint(
                {
                    'iter': iters,
                    'model': model.state_dict(),
                    'best_rsum': best_score,
                    'opt': opt,
                    'Eiters': model.Eiters,
                },
                is_best,
                prefix=opt.logger_name + '/')

    print("Finished trained. Best score: {}".format(best_score))
예제 #12
0
def train(opt,
          train_loader,
          model,
          optimizer,
          epoch,
          tb_logger,
          val_loader,
          measure='cosine',
          grad_clip=-1,
          scheduler=None,
          warmup_scheduler=None,
          ndcg_val_scorer=None):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        model.train()
        if scheduler is not None:
            scheduler.step(epoch)

        if warmup_scheduler is not None:
            warmup_scheduler.dampen()

        optimizer.zero_grad()

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        loss_dict = model(*train_data)
        loss = sum(loss for loss in loss_dict.values())

        # compute gradient and do SGD step
        loss.backward()
        if grad_clip > 0:
            torch.nn.utils.clip_grad.clip_grad_norm_(model.parameters(),
                                                     grad_clip)
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.add_scalar('epoch', epoch, model.Eiters)
        tb_logger.add_scalar('step', i, model.Eiters)
        tb_logger.add_scalar('batch_time', batch_time.val, model.Eiters)
        tb_logger.add_scalar('data_time', data_time.val, model.Eiters)
        tb_logger.add_scalar('lr', optimizer.param_groups[0]['lr'],
                             model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            validate(val_loader,
                     model,
                     tb_logger,
                     measure=measure,
                     log_step=opt.log_step,
                     ndcg_scorer=ndcg_val_scorer)
예제 #13
0
def train(train_loader, model, criterion, optimizer, epoch, print_freq,
          summary_writer):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))

    summary_writer.add_scalar('data/losses_avg', losses.avg, epoch)
    summary_writer.add_scalar('data/top1_avg', top1.avg, epoch)
    summary_writer.add_scalar('data/top5_avg', top5.avg, epoch)
예제 #14
0
def train(opt, train_loader, adapt_loader, model, model_ema, epoch, val_loader, tb_writer):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    end = time.time()
    adapt_iter = iter(adapt_loader)
    adapt_loss = torch.nn.MSELoss()

    if opt.ramp_lr:
        adjust_learning_rate_mean_teacher(
            model.optimizer, epoch, opt.num_epochs,
            opt.initial_lr_rampup, opt.initial_lr)
    else:
        adjust_learning_rate(opt, model.optimizer, epoch)

    consistency_weight = get_current_consistency_weight(
        opt.consistency_weight, epoch, opt.consistency_rampup)


    for i, train_data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        model.Eiters += 1

        # switch to train mode
        model.train_start()
        model_ema.train_start()

        # make sure train logger is used
        model.logger = train_logger

        try:
            adapt_data = next(adapt_iter)
        except:
            adapt_iter = iter(adapt_loader)
            adapt_data = next(adapt_iter)

        # Get embeddings
        img_emb, cap_emb, cap_lens = model.run_emb(*train_data)

        # Data for Domain Adaptation or SS Learning
        # Adapt loader returns different features for the same images
        adapt_imgs_ema, adapt_imgs, _, _, _ = adapt_data
        adapt_imgs = adapt_imgs.float().cuda()
        adapt_imgs_ema = adapt_imgs_ema.float().cuda()

        with torch.no_grad():
            ema_adapt_imgs_emb = model_ema.img_enc(adapt_imgs_ema)

        adapt_imgs_emb = model.img_enc(adapt_imgs)

        consistency_loss_img = adapt_loss(ema_adapt_imgs_emb, adapt_imgs_emb)
        consistency_loss = consistency_loss_img * consistency_weight

        # measure accuracy and record loss
        model.optimizer.zero_grad()
        loss = model.forward_loss(img_emb, cap_emb, cap_lens)
        total_loss = loss + consistency_loss

        # compute gradient and do SGD step
        total_loss.backward()
        if model.grad_clip > 0:
            torch.nn.utils.clip_grad_norm_(
                parameters=model.params, max_norm=model.grad_clip)

        model.optimizer.step()

        if epoch <= opt.ema_late_epoch:
            update_ema_variables(
                model=model,
                ema_model=model_ema,
                alpha=opt.consistency_alpha,
                global_step=model.Eiters,
            )
        else:
            update_ema_variables(
                model=model,
                ema_model=model_ema,
                alpha=opt.consistency_alpha_late,
                global_step=model.Eiters,
            )


        # Update the model
        # model.train_emb(*train_data)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()        

        tb_writer.add_scalar('Iter', model.Eiters, model.Eiters)
        tb_writer.add_scalar('Lr', model.optimizer.param_groups[0]['lr'], model.Eiters)
        tb_writer.add_scalar('Consistency weight', consistency_weight, model.Eiters)

        model.logger.update('Contr Loss', loss.item(), )
        model.logger.update('Adapt Loss', consistency_loss.item(), )
        model.logger.update('Total Loss', total_loss.item(), )


        # Print log info
        if model.Eiters % opt.log_step == 0:
            print(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                .format(
                    epoch, i, len(train_loader), batch_time=batch_time,
                    data_time=data_time, e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_writer.add_scalar('epoch', epoch, model.Eiters)
        tb_writer.add_scalar('step', i, model.Eiters)
        tb_writer.add_scalar('batch_time', batch_time.val, model.Eiters)
        tb_writer.add_scalar('data_time', data_time.val, model.Eiters)

        model.logger.tb_log(tb_writer, prefix='train', step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0 and model.Eiters > 0:
            validate(opt, val_loader, model, tb_writer)

            if opt.log_images:
                plot_img = vutils.make_grid(train_data[0],
                                normalize=True, scale_each=True)
                tb_writer.add_image('Labeled Images', plot_img, model.Eiters)

                plot_img = vutils.make_grid(adapt_imgs,
                                normalize=True, scale_each=True)
                tb_writer.add_image('Adapt Images', plot_img, model.Eiters)
def test(test_loader, model, criterion, print_freq):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    #preds = np.zeros((0,7,))
    pred_labels = np.zeros([0,])
    GT_labels = np.zeros([0,])
    for i, (input, target) in enumerate(test_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        '''
        cal_probs = torch.nn.Softmax(dim=0)
        probs = cal_probs(output)
        preds = np.concatenate([preds, probs.data.cpu().numpy()], axis=0)
        '''

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        _, pred = output.data.topk(1, 1, True, True)
        pred_labels = np.concatenate([pred_labels, pred.cpu().numpy().flatten()], axis=0)
        GT_labels = np.concatenate([GT_labels, target.cpu().numpy().flatten()], axis=0)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(test_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    categories = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']
    build_confusion_mtx(GT_labels, pred_labels, categories)

    '''
    mean_score, std_score = get_inception_score(preds)
    print(' * IS: mean {mean_score:.3f} std {std_score:.3f}'.format(mean_score=mean_score, std_score=std_score))
    '''

    return top1.avg
예제 #16
0
def train(opt, model, epoch, train_loader, val_loader):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    kmeans_features = None
    kmeans_emb = None

    end = time.time()

    if opt.cluster_loss:
        features = retrieve_features(train_loader)
        kmeans_features = get_centers(features, opt.n_clusters)

    # https://stats.stackexchange.com/questions/299013/cosine-distance-as-similarity-measure-in-kmeans
    # normalizing and euclidian distance is linear correlated with cosine distance

    for j, (images, targets, lengths, ids) in enumerate(train_loader):

        if opt.cluster_loss:
            img_embs, _, _ = encode_data(model, train_loader)
            kmeans_emb = get_centers(img_embs, opt.n_clusters)

        # switch to train mode
        model.train_start()
        # if j == i:
        #     same = True
        # else:
        #     same = False

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_emb(epoch, images, targets, lengths, ids, opt.cluster_loss,
                        kmeans_features, kmeans_emb)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    j,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', j, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            validate(opt, val_loader, model)
예제 #17
0
def train(opt, train_loader, adapt_loader, model, model_ema, epoch, val_loader,
          tb_writer):
    # average meters to record the training statistics
    from model import ContrastiveLoss
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    end = time.time()
    adapt_iter = iter(adapt_loader)
    if opt.adapt_loss == 'mse':
        adapt_loss = torch.nn.MSELoss()
    if opt.adapt_loss == 'contrastive':
        adapt_loss = ContrastiveLoss(margin=opt.margin, measure=opt.measure)

    if opt.ramp_lr:
        adjust_learning_rate_mean_teacher(model.optimizer, epoch,
                                          opt.num_epochs,
                                          opt.initial_lr_rampup,
                                          opt.initial_lr)
    else:
        adjust_learning_rate(opt, model.optimizer, epoch)

    consistency_weight = get_current_consistency_weight(
        opt.consistency_weight, epoch, opt.consistency_rampup)

    if opt.max_violation:
        gamma = 1.
    else:
        gamma = adjust_gamma(init_gamma=0.0, epoch=epoch, increase=0.2)
        train_logger.update('hard_contr_gamma', gamma, n=0)

    for i, train_data in enumerate(train_loader):
        # measure data loading time

        data_time.update(time.time() - end)
        model.Eiters += 1

        # switch to train mode
        model.train_start()
        model_ema.train_start()

        # make sure train logger is used
        model.logger = train_logger

        try:
            adapt_data = next(adapt_iter)
        except:
            adapt_iter = iter(adapt_loader)
            adapt_data = next(adapt_iter)

        # Get embeddings
        img_emb, cap_emb = model.run_emb(*train_data)

        # Data for Domain Adaptation or SS Learning
        # Adapt loader returns different features for the same images
        adapt_imgs_ema, adapt_imgs, adapt_caption, adapt_lens, _ = adapt_data

        adapt_imgs = adapt_imgs.float().cuda()
        adapt_imgs_ema = adapt_imgs_ema.float().cuda()

        consistency_loss_cap = 0.
        if opt.adapt_split != 'unlabeled':
            with torch.no_grad():
                adapt_caption = adapt_caption.cuda()
                ema_adapt_cap_emb = model_ema.txt_enc(
                    adapt_caption, adapt_lens, dropout=opt.dropout_noise)
                adapt_cap_mb = model.txt_enc(adapt_caption,
                                             adapt_lens,
                                             dropout=opt.dropout_noise)
                consistency_loss_cap = adapt_loss(ema_adapt_cap_emb,
                                                  adapt_cap_mb)

        with torch.no_grad():
            ema_adapt_imgs_emb = model_ema.img_enc(adapt_imgs_ema)

        adapt_imgs_emb = model.img_enc(adapt_imgs)

        consistency_loss_img = adapt_loss(ema_adapt_imgs_emb, adapt_imgs_emb)
        consistency_loss = (consistency_loss_img / 2. +
                            consistency_loss_cap / 2.) * consistency_weight

        # measure accuracy and record loss
        model.optimizer.zero_grad()
        loss = model.forward_loss(img_emb, cap_emb, gamma=gamma)
        total_loss = loss + consistency_loss

        # compute gradient and do SGD step
        total_loss.backward()
        if model.grad_clip > 0:
            clip_grad_norm(model.params, model.grad_clip)

        model.optimizer.step()

        if epoch <= opt.ema_late_epoch:
            update_ema_variables(
                model=model,
                ema_model=model_ema,
                alpha=opt.consistency_alpha,
                global_step=model.Eiters,
            )
        else:
            update_ema_variables(
                model=model,
                ema_model=model_ema,
                alpha=opt.consistency_alpha_late,
                global_step=model.Eiters,
            )

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        model.logger.update('Iter', model.Eiters, 0)
        model.logger.update('Lr', model.optimizer.param_groups[0]['lr'], 0)
        model.logger.update('Consistency weight', consistency_weight, 0)

        model.logger.update(
            'Contr Loss',
            loss.item(),
        )
        model.logger.update(
            'Adapt Loss',
            consistency_loss.item(),
        )
        model.logger.update(
            'Total Loss',
            total_loss.item(),
        )

        # Print log info
        if model.Eiters % opt.log_step == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  '{e_log}\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      e_log=str(model.logger)))
            # print(model.logger)
            pass

        # Record logs in tensorboard
        tb_writer.add_scalar('epoch', epoch, model.Eiters)
        tb_writer.add_scalar('step', i, model.Eiters)
        tb_writer.add_scalar('batch_time', batch_time.val, model.Eiters)
        tb_writer.add_scalar('data_time', data_time.val, model.Eiters)

        model.logger.tb_log(tb_writer, model.Eiters)

        # validate at every val_step
        if model.Eiters % opt.val_step == 0:
            # print('Validate normal')
            print('Validate EMA')
            validate(opt, val_loader, model_ema, tb_writer)
            # validate(opt, val_loader, model, tb_writer)

            if opt.log_images:
                plot_img = vutils.make_grid(train_data[0],
                                            normalize=True,
                                            scale_each=True)
                tb_writer.add_image('Labeled Images', plot_img, model.Eiters)

                plot_img = vutils.make_grid(adapt_imgs,
                                            normalize=True,
                                            scale_each=True)
                tb_writer.add_image('Adapt Images', plot_img, model.Eiters)