Ejemplo n.º 1
0
def train(opt, train_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    progbar = Progbar(len(train_loader.dataset))
    end = time.time()
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        b_size, loss = model.train_emb(*train_data)

        progbar.add(b_size, values=[('loss', loss)])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
def encode_data_for_avs(model, data_loader, log_step=100, logging=print, return_ids=True):
    """Encode all videos and captions loadable by `data_loader`
    """
    batch_time = AverageMeter()
    val_logger = LogCollector()

    # switch to evaluate mode
    model.val_start()

    end = time.time()

    # numpy array to keep all the embeddings
    video_embs = None
    cap_embs = None
    errorlists = []
    diagonal = []
    cap_ids_all = []
    video_ids = [''] * len(data_loader.dataset)
    caption_ids = [''] * len(data_loader.dataset)
    diagonal_ids = [''] * len(data_loader.dataset)

    for i, (videos, captions, idxs, cap_ids, vid_ids) in enumerate(data_loader):
        # make sure val logger is used
        model.logger = val_logger

        # compute the embeddings
        vid_emb, cap_emb = model.forward_emb(videos, captions, True)

        # preserve the embeddings by copying from gpu and converting to numpy
        video_embs = vid_emb.data.cpu().numpy().copy()
        cap_embs = cap_emb.data.cpu().numpy().copy()
        errorlistList = cosine_calculate(cap_embs, video_embs)

        errorlist = np.asanyarray(errorlistList)
        diagonal = np.append(diagonal, np.diag(errorlist))

        cap_ids_all.extend(cap_ids)

        for j, idx in enumerate(idxs):
            caption_ids[idx] = cap_ids[j]
            video_ids[idx] = vid_ids[j]
            diagonal_ids[idx] = np.diag(errorlist).tolist()[j]

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % log_step == 0:
            logging('Test: [{0:2d}/{1:2d}]\t'
                    '{e_log}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                .format(
                i, len(data_loader), batch_time=batch_time,
                e_log=str(model.logger)))
        del videos, captions

    if return_ids == True:
        return video_embs, cap_embs, diagonal, diagonal_ids, cap_ids_all, video_ids, caption_ids
    else:
        return video_embs, cap_embs, diagonal, diagonal_ids, cap_ids_all
Ejemplo n.º 3
0
def encode_data(model,
                data_loader,
                log_step=10,
                logging=print,
                return_ids=True):
    """Encode all videos and captions loadable by `data_loader`
    """
    batch_time = AverageMeter()
    val_logger = LogCollector()

    # switch to evaluate mode
    model.val_start()

    end = time.time()

    # numpy array to keep all the embeddings
    video_embs = None
    cap_embs = None
    video_ids = [''] * len(data_loader.dataset)
    caption_ids = [''] * len(data_loader.dataset)
    for i, (videos, captions, idxs, cap_ids,
            vid_ids) in enumerate(data_loader):
        # make sure val logger is used
        model.logger = val_logger

        # compute the embeddings
        vid_emb, cap_emb = model.forward_emb(videos, captions, True)

        # initialize the numpy arrays given the size of the embeddings
        if video_embs is None:
            video_embs = np.zeros((len(data_loader.dataset), vid_emb.size(1)))
            cap_embs = np.zeros((len(data_loader.dataset), cap_emb.size(1)))

        # preserve the embeddings by copying from gpu and converting to numpy
        video_embs[idxs] = vid_emb.data.cpu().numpy().copy()
        cap_embs[idxs] = cap_emb.data.cpu().numpy().copy()

        for j, idx in enumerate(idxs):
            caption_ids[idx] = cap_ids[j]
            video_ids[idx] = vid_ids[j]

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % log_step == 0:
            logging(
                'Test: [{0:2d}/{1:2d}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(
                    i,
                    len(data_loader),
                    batch_time=batch_time,
                    e_log=str(model.logger)))
        del videos, captions

    if return_ids == True:
        return video_embs, cap_embs, video_ids, caption_ids
    else:
        return video_embs, cap_embs
Ejemplo n.º 4
0
def train(opt, train_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()
    loss_value = []
    pos_value = []
    neg_value = []

    # switch to train mode
    model.train_start()

    progbar = Progbar(len(train_loader.dataset))
    end = time.time()
    for i, train_data in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model

        # for i, content in enumerate(train_data):
        #     print(i)

        # exit(0)

        b_size, loss, pos, neg = model.train_emb(*train_data)

        loss_value.append(loss)
        pos_value.append(pos)
        neg_value.append(neg)

        progbar.add(b_size, values=[('loss', loss)])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
    loss_value = np.array(loss_value)
    pos_value = np.array(pos_value)
    neg_value = np.array(neg_value)
    return loss_value.mean(), pos_value.mean(), neg_value.mean()