def train(opt, train_loader, model, epoch): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() progbar = Progbar(len(train_loader.dataset)) end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model b_size, loss = model.train_emb(*train_data) progbar.add(b_size, values=[('loss', loss)]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters)
def encode_data_for_avs(model, data_loader, log_step=100, logging=print, return_ids=True): """Encode all videos and captions loadable by `data_loader` """ batch_time = AverageMeter() val_logger = LogCollector() # switch to evaluate mode model.val_start() end = time.time() # numpy array to keep all the embeddings video_embs = None cap_embs = None errorlists = [] diagonal = [] cap_ids_all = [] video_ids = [''] * len(data_loader.dataset) caption_ids = [''] * len(data_loader.dataset) diagonal_ids = [''] * len(data_loader.dataset) for i, (videos, captions, idxs, cap_ids, vid_ids) in enumerate(data_loader): # make sure val logger is used model.logger = val_logger # compute the embeddings vid_emb, cap_emb = model.forward_emb(videos, captions, True) # preserve the embeddings by copying from gpu and converting to numpy video_embs = vid_emb.data.cpu().numpy().copy() cap_embs = cap_emb.data.cpu().numpy().copy() errorlistList = cosine_calculate(cap_embs, video_embs) errorlist = np.asanyarray(errorlistList) diagonal = np.append(diagonal, np.diag(errorlist)) cap_ids_all.extend(cap_ids) for j, idx in enumerate(idxs): caption_ids[idx] = cap_ids[j] video_ids[idx] = vid_ids[j] diagonal_ids[idx] = np.diag(errorlist).tolist()[j] # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % log_step == 0: logging('Test: [{0:2d}/{1:2d}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' .format( i, len(data_loader), batch_time=batch_time, e_log=str(model.logger))) del videos, captions if return_ids == True: return video_embs, cap_embs, diagonal, diagonal_ids, cap_ids_all, video_ids, caption_ids else: return video_embs, cap_embs, diagonal, diagonal_ids, cap_ids_all
def encode_data(model, data_loader, log_step=10, logging=print, return_ids=True): """Encode all videos and captions loadable by `data_loader` """ batch_time = AverageMeter() val_logger = LogCollector() # switch to evaluate mode model.val_start() end = time.time() # numpy array to keep all the embeddings video_embs = None cap_embs = None video_ids = [''] * len(data_loader.dataset) caption_ids = [''] * len(data_loader.dataset) for i, (videos, captions, idxs, cap_ids, vid_ids) in enumerate(data_loader): # make sure val logger is used model.logger = val_logger # compute the embeddings vid_emb, cap_emb = model.forward_emb(videos, captions, True) # initialize the numpy arrays given the size of the embeddings if video_embs is None: video_embs = np.zeros((len(data_loader.dataset), vid_emb.size(1))) cap_embs = np.zeros((len(data_loader.dataset), cap_emb.size(1))) # preserve the embeddings by copying from gpu and converting to numpy video_embs[idxs] = vid_emb.data.cpu().numpy().copy() cap_embs[idxs] = cap_emb.data.cpu().numpy().copy() for j, idx in enumerate(idxs): caption_ids[idx] = cap_ids[j] video_ids[idx] = vid_ids[j] # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % log_step == 0: logging( 'Test: [{0:2d}/{1:2d}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( i, len(data_loader), batch_time=batch_time, e_log=str(model.logger))) del videos, captions if return_ids == True: return video_embs, cap_embs, video_ids, caption_ids else: return video_embs, cap_embs
def train(opt, train_loader, model, epoch): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() loss_value = [] pos_value = [] neg_value = [] # switch to train mode model.train_start() progbar = Progbar(len(train_loader.dataset)) end = time.time() for i, train_data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model # for i, content in enumerate(train_data): # print(i) # exit(0) b_size, loss, pos, neg = model.train_emb(*train_data) loss_value.append(loss) pos_value.append(pos) neg_value.append(neg) progbar.add(b_size, values=[('loss', loss)]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) loss_value = np.array(loss_value) pos_value = np.array(pos_value) neg_value = np.array(neg_value) return loss_value.mean(), pos_value.mean(), neg_value.mean()