def validate_mffr(model, val_loader): LOGGER.info("start running MFFR validation...") val_loss = 0 cosine = 0 n_feat = 0 st = time() for i, batch in enumerate(val_loader): targets = batch['feat_targets'] pred_feat = model(batch, task='mffr', compute_loss=False) loss = F.mse_loss(pred_feat, targets, reduction='none') loss = torch.sqrt(loss.sum(dim=1)) val_loss += loss.sum().item() cosine += F.cosine_similarity(pred_feat, targets, dim=-1).sum().item() n_feat += batch['c_v_masks'].sum().item() val_loss = sum(all_gather_list(val_loss)) cosine = sum(all_gather_list(cosine)) n_feat = sum(all_gather_list(n_feat)) tot_time = time() - st val_loss /= n_feat val_log = { 'loss': val_loss, 'cosine': cosine / n_feat, 'feat_per_s': n_feat / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"loss: {val_loss:.2f}") return val_log
def validate(model, val_loader, split): model.eval() val_loss = 0 tot_score = 0 n_ex = 0 st = time() results = [] for i, batch in enumerate(val_loader): qids = batch['qids'] targets = batch['targets'] del batch['targets'] del batch['qids'] scores = model(batch, compute_loss=False) loss = F.cross_entropy(scores, targets, reduction='sum') val_loss += loss.item() tot_score += (scores.max(dim=-1, keepdim=False)[1] == targets ).sum().item() answers = ['True' if i == 1 else 'False' for i in scores.max(dim=-1, keepdim=False )[1].cpu().tolist()] results.extend(zip(qids, answers)) n_ex += len(qids) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time()-st val_loss /= n_ex val_acc = tot_score / n_ex val_log = {f'valid/{split}_loss': val_loss, f'valid/{split}_acc': val_acc, f'valid/{split}_ex_per_s': n_ex/tot_time} model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log, results
def validate(model, val_loader, label2ans, split='val'): model.eval() val_loss = 0 tot_score = 0 n_ex = 0 st = time() results = {} for i, batch in enumerate(val_loader): scores = model(batch, compute_loss=False) targets = batch['targets'] loss = F.binary_cross_entropy_with_logits( scores, targets, reduction='sum') val_loss += loss.item() tot_score += compute_score_with_logits(scores, targets).sum().item() answers = [label2ans[i] for i in scores.max(dim=-1, keepdim=False )[1].cpu().tolist()] qids = batch['qids'] for qid, answer in zip(qids, answers): results[qid] = answer n_ex += len(qids) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time()-st val_loss /= n_ex val_acc = tot_score / n_ex val_log = {f'valid/{split}_loss': val_loss, f'valid/{split}_acc': val_acc, f'valid/{split}_ex_per_s': n_ex/tot_time} model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log, results
def validate_mlm(model, val_loader): LOGGER.info("start running MLM validation...") val_loss = 0 n_correct = 0 n_word = 0 st = time() for i, batch in enumerate(val_loader): scores = model(batch, task='mlm', compute_loss=False) labels = batch['txt_labels'] labels = labels[labels != -1] loss = F.cross_entropy(scores, labels, reduction='sum') val_loss += loss.item() n_correct += (scores.max(dim=-1)[1] == labels).sum().item() n_word += labels.numel() val_loss = sum(all_gather_list(val_loss)) n_correct = sum(all_gather_list(n_correct)) n_word = sum(all_gather_list(n_word)) tot_time = time()-st val_loss /= n_word acc = n_correct / n_word val_log = {'loss': val_loss, 'acc': acc, 'tok_per_s': n_word/tot_time} LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"acc: {acc*100:.2f}") return val_log
def validate_mlm_old(model, val_loader): LOGGER.info("start running MLM validation...") val_loss = 0 n_correct = 0 n_word = 0 st = time() for i, batch in enumerate(val_loader): scores = model.forward(batch, task='mlm', compute_loss=False) loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction='sum') scores = scores.contiguous().view(-1, model.config.vocab_size) labels = batch['txt_labels'].contiguous().view(-1) loss = loss_fct(scores, labels) val_loss += loss.item() n_correct += accuracy_count(scores, labels) n_word += batch['txt_labels'].numel() val_loss = sum(all_gather_list(val_loss)) n_correct = sum(all_gather_list(n_correct)) n_word = sum(all_gather_list(n_word)) tot_time = time()-st val_loss /= n_word acc = n_correct / n_word val_log = {'loss': val_loss, 'acc': acc, 'tok_per_s': n_word/tot_time} LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"acc: {acc*100:.2f}") return val_log
def validate_vmlm_soft(model, val_loader): LOGGER.info("start running VMLM-SOFT validation...") val_loss = 0 n_feat = 0 st = time() tot_score = 0 #label2token_matrix = torch.float(LABEL2TOKEN_MATRIX) for i, batch in enumerate(val_loader): prediction_soft_label = model( batch, task='vmlm-soft', compute_loss=False) prediction_soft_label = F.log_softmax( prediction_soft_label, dim=-1) label_targets = batch['label_targets'] #convert label_targets to a new dimension # label_targets = torch.matmul(label_targets, torch.cuda.FloatTensor(LABEL2TOKEN_MATRIX)) # label_targets = label_targets[:, VALID_XLMR_TOKEN_IDS] # label_targets = label_targets / torch.sum(label_targets, dim=1, keepdim=True) tot_score += compute_accuracy_for_soft_targets( prediction_soft_label, label_targets) loss = F.kl_div( prediction_soft_label, label_targets, reduction='sum') val_loss += loss.item() n_feat += batch['tgt_masks'].sum().item() val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_feat = sum(all_gather_list(n_feat)) tot_time = time()-st val_loss /= n_feat val_acc = tot_score / n_feat val_log = {'loss': val_loss, 'acc': val_acc, 'feat_per_s': n_feat/tot_time} LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log
def validate_mmxlm_soft(model, val_loader): LOGGER.info("start running MMXLM_SOFT validation...") val_loss = 0 n_feat = 0 st = time() tot_score = 0 for i, batch in enumerate(val_loader): prediction_soft_label = model( batch, task="mmxlm-soft", compute_loss=False) #if "kl" in task: prediction_soft_label = F.log_softmax( prediction_soft_label, dim=-1) label_targets = batch['label_targets'] loss = F.kl_div( prediction_soft_label, label_targets, reduction='sum') tot_score += compute_accuracy_for_soft_targets( prediction_soft_label, label_targets) val_loss += loss.item() n_feat += batch['tgt_masks'].sum().item() val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_feat = sum(all_gather_list(n_feat)) tot_time = time()-st val_loss /= n_feat val_acc = tot_score / n_feat val_log = {'loss': val_loss, 'acc': val_acc, 'feat_per_s': n_feat/tot_time} LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log
def validate_videoQA(model, val_loader, split, task="tvqa", save_logits=False): LOGGER.info(f"start running validation on {task} {split} split...") model.eval() val_loss = 0 n_ex = 0 tot_score = 0 results = {} logits = {} val_log = {} st = time() has_gt_target = True for i, batch in enumerate(val_loader): targets = batch['targets'] if has_gt_target and targets.min() < 0: has_gt_target = False LOGGER.info( "No GT annotations provided, only generate predictions") if 'qids' in batch: qids = batch['qids'] del batch['qids'] scores = model(batch, task, compute_loss=False) answers = [ i for i in scores.max(dim=-1, keepdim=False)[1].cpu().tolist() ] for qid, answer in zip(qids, answers): results[str(qid)] = answer if save_logits: scores = scores.cpu().tolist() for qid, logit in zip(qids, scores): logits[str(qid)] = logit if has_gt_target: loss = F.cross_entropy(scores, targets.squeeze(-1), reduction='sum') val_loss += loss.item() tot_score += compute_accuracies(scores, targets) n_ex += len(qids) if has_gt_target: val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_loss /= n_ex val_acc = tot_score / n_ex val_log = { 'valid/loss': val_loss, 'valid/acc': val_acc, 'valid/ex_per_s': n_ex / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"loss:{val_loss:.2f}, score: {val_acc*100:.2f}") model.train() return val_log, results, logits
def validate_vcmr(model, val_loader, opts): LOGGER.info( "start running validation (easy version with loss computed)...") val_loss = 0 val_loss_st_ed = 0 val_loss_neg_ctx = 0 val_loss_neg_q = 0 n_ex = 0 n_ex_pos = 0 st = time() for i, batch in enumerate(val_loader): if 'qids' in batch: # qids = batch['qids'] del batch['qids'] n_ex += len(batch['q_vidx']) loss_st_ed, loss_neg_ctx, loss_neg_q =\ model(batch, opts.task, compute_loss=True) val_loss_st_ed += loss_st_ed.item() if opts.lw_neg_ctx != 0 or opts.lw_neg_q != 0: n_pos = len(loss_neg_ctx) val_loss_neg_ctx += loss_neg_ctx.sum().item() val_loss_neg_q += loss_neg_q.sum().item() n_ex_pos += n_pos val_loss_st_ed = sum(all_gather_list(val_loss_st_ed)) val_loss_neg_ctx = sum(all_gather_list(val_loss_neg_ctx)) val_loss_neg_q = sum(all_gather_list(val_loss_neg_q)) n_ex = sum(all_gather_list(n_ex)) n_ex_pos = sum(all_gather_list(n_ex_pos)) tot_time = time() - st if opts.lw_st_ed: val_loss_st_ed /= n_ex val_loss_st_ed /= opts.lw_st_ed if n_ex_pos > 0 and opts.lw_neg_q > 0 and\ opts.lw_neg_ctx > 0: val_loss_neg_ctx /= n_ex_pos val_loss_neg_q /= n_ex_pos val_loss_neg_ctx /= opts.lw_neg_ctx val_loss_neg_q /= opts.lw_neg_q val_loss = opts.lw_st_ed * val_loss_st_ed +\ opts.lw_neg_ctx * val_loss_neg_ctx +\ opts.lw_neg_q * val_loss_neg_q val_log = { 'valid/loss_overall': val_loss, 'valid/loss_st_ed': val_loss_st_ed, 'valid/loss_neg_ctx': val_loss_neg_ctx, 'valid/loss_neg_q': val_loss_neg_q, 'valid/ex_per_s': n_ex / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"loss: {val_loss:.2f}") return val_log
def main(opts): hvd.init() if hvd.rank() == 0: toker = RobertaTokenizer.from_pretrained('roberta-base') all_gather_list(None) else: all_gather_list(None) toker = RobertaTokenizer.from_pretrained('roberta-base') model_opts = Struct(json.load(open(f"{opts.model_dir}/log/hps.json"))) model_config = f"{opts.model_dir}/log/model_config.json" video_db = load_video_sub_dataset(model_opts.vfeat_db, model_opts.sub_txt_db, model_opts.vfeat_interval, model_opts) dset = TvcEvalDataset(video_db, opts.target_clip) loader = build_dataloader(dset, opts.batch_size, TvcEvalDataset.collate, False, opts) checkpoint = torch.load(f"{opts.model_dir}/ckpt/" f"model_step_{opts.ckpt_step}.pt") img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\ ".position_embeddings.weight" if img_pos_embed_weight_key in checkpoint: max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) else: max_frm_seq_len = MAX_FRM_SEQ_LEN model = HeroForTvc.from_pretrained(model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lsr=model_opts.lsr) model.cuda() model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') bos = toker.convert_tokens_to_ids(['<s>'])[0] eos = toker.convert_tokens_to_ids(['</s>'])[0] model.eval() generator = TvcGenerator(model, opts.max_gen_step, bos, eos, opts.fp16) results = decode(loader, generator, toker) save_jsonl(results, opts.output) # evaluate score if possible if (hvd.rank() == 0 and 'descs' in json.loads(next(iter(open(opts.target_clip))))): evaluator = TVCEval(opts.target_clip) score = evaluator(results) print(score)
def compute_hard_neg(model, loader, dataset, hard_negative_num, hard_neg_dir): txt2hardimgs, img2hardtxts = get_hard_negs(model, loader, hard_negative_num) with open(f'{hard_neg_dir}/' f'txt2hardimgs_rank{hvd.rank()}.json', 'w') as f: json.dump(txt2hardimgs, f) if hvd.rank() == 0: with open(f'{hard_neg_dir}/img2hardtxts.json', 'w') as f: json.dump(img2hardtxts, f) all_gather_list(None) # dummy sync to wait for writing if isinstance(dataset, ConcatDataset): for dset in dataset.datasets: dset.reload_hard_negs(hard_neg_dir) else: dataset.reload_hard_negs(hard_neg_dir)
def all_gather_stats_list(stat_list, max_size=4096): """ Gather a `Statistics` list accross all processes/nodes Args: stat_list(list([`Statistics`])): list of statistics objects to gather accross all processes/nodes max_size(int): max buffer size to use Returns: our_stats(list([`Statistics`])): list of updated stats """ from torch.distributed import get_rank from utils.distributed import all_gather_list # Get a list of world_size lists with len(stat_list) Statistics objects all_stats = all_gather_list(stat_list, max_size=max_size) our_rank = get_rank() our_stats = all_stats[our_rank] for other_rank, stats in enumerate(all_stats): if other_rank == our_rank: continue for i, stat in enumerate(stats): our_stats[i].update(stat, update_n_src_words=True) return our_stats
def validate(loader, generator, tokenizer, evaluator): st = time() generator.model.eval() results = [] for batch in loader: vids = batch['vid_names'] cids = batch['clip_ids'] all_ts = batch['all_ts'] outputs = generator.greedy_decode(batch) for vid, cid, ts, out_ids in zip(vids, cids, all_ts, outputs): output = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(out_ids)) results.append({ 'vid_name': vid, 'clip_id': cid, 'ts': ts, 'descs': [{ 'desc': output }] }) results = [r for rs in all_gather_list(results) for r in rs] LOGGER.info(f'decoding finished in {int(time() - st)} seconds') if hvd.rank() == 0: val_log = evaluator(results) LOGGER.info(f'Validation finished in {int(time() - st)} seconds') LOGGER.info(f'CIDEr: {val_log["CIDEr"]}') else: val_log = {} generator.model.train() return val_log, results
def test(model, val_loader, label2ans): from utils.logger import LOGGER, TB_LOGGER, RunningMeter, add_log_to_file LOGGER.info("start running test...") model.eval() val_loss = 0 tot_score = 0 n_ex = 0 st = time() results = [] for i, batch in enumerate(val_loader): scores = model(batch, compute_loss=False) answers = torch.nn.functional.softmax(scores) for qid, answer in zip(batch['qids'], answers): results.append({ 'id': qid, 'proba': answer[0].cpu().item(), 'label': int(answer[0].cpu().item() > 0.5) }) n_ex += len(batch['qids']) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_log = {'valid/ex_per_s': n_ex / tot_time} model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, ") return val_log, results
def evaluate(model, eval_loader, label2ans, save_logits=False): LOGGER.info("start running evaluation...") model.eval() n_ex = 0 st = time() results = [] logits = {} for i, batch in enumerate(eval_loader): qids = batch["qids"] scores = model(batch, compute_loss=False) answers = [ label2ans[i] for i in scores.max(dim=-1, keepdim=False)[1].cpu().tolist() ] for qid, answer in zip(qids, answers): results.append({"answer": answer, "question_id": int(qid)}) if save_logits: scores = scores.cpu() for i, qid in enumerate(qids): logits[qid] = scores[i].half().numpy() if i % 100 == 0 and hvd.rank() == 0: n_results = len(results) n_results *= hvd.size() # an approximation to avoid hangs LOGGER.info(f"{n_results}/{len(eval_loader.dataset)} " "answers predicted") n_ex += len(qids) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_log = {"valid/ex_per_s": n_ex / tot_time} model.train() LOGGER.info(f"evaluation finished in {int(tot_time)} seconds " f"at {int(n_ex/tot_time)} examples per second") return val_log, results, logits
def validate_violin(model, val_loader, split, save_logits=False): LOGGER.info("start running validation on VIOLIN {split} split...") model.eval() val_loss = 0 n_ex = 0 tot_score = 0 results = {} logits = {} st = time() for i, batch in enumerate(val_loader): targets = batch['targets'] if 'qids' in batch: qids = batch['qids'] del batch['qids'] scores = model(batch, "violin", compute_loss=False) predictions = (torch.sigmoid(scores) > 0.5).long() answers = predictions.squeeze().cpu().tolist() for qid, answer in zip(qids, answers): results[str(qid)] = answer if save_logits: scores = scores.cpu().tolist() for qid, logit in zip(qids, scores): logits[str(qid)] = logit loss = F.binary_cross_entropy(torch.sigmoid(scores), targets.to(dtype=scores.dtype), reduction='sum') val_loss += loss.item() tot_score += compute_accuracies(predictions, targets) n_ex += len(qids) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_loss /= n_ex val_acc = tot_score / n_ex val_log = { f'valid/{split}_loss': val_loss, f'valid/{split}_acc': val_acc, f'valid/{split}_ex_per_s': n_ex / tot_time } LOGGER.info(f"validation of {split} split finished in {int(tot_time)}s, " f"loss:{val_loss:.2f}, score: {val_acc*100:.2f}") return val_log, results, logits
def validate_mrfr(model, val_loader): LOGGER.info("start running MRFR validation...") val_loss = 0 n_feat = 0 st = time() for i, batch in enumerate(val_loader): loss = model(batch, task='mrfr', compute_loss=True) val_loss += loss.sum().item() / IMG_DIM n_feat += batch['img_mask_tgt'].sum().item() val_loss = sum(all_gather_list(val_loss)) n_feat = sum(all_gather_list(n_feat)) tot_time = time() - st val_loss /= n_feat val_log = {'loss': val_loss, 'feat_per_s': n_feat / tot_time} LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"loss: {val_loss:.2f}") return val_log
def validate_mrc(model, val_loader, task): LOGGER.info("start running MRC validation...") val_loss = 0 n_feat = 0 st = time() tot_score = 0 for i, batch in enumerate(val_loader): prediction_soft_label = model(batch, task=task, compute_loss=False) if "kl" in task: prediction_soft_label = F.log_softmax(prediction_soft_label, dim=-1) label_targets = batch["label_targets"] loss = F.kl_div(prediction_soft_label, label_targets, reduction="sum") tot_score += compute_accuracy_for_soft_targets( prediction_soft_label, label_targets) else: # background class should not be the target cls_label_targets = label_targets[:, 1:].max(dim=-1)[1] + 1 loss = F.cross_entropy( prediction_soft_label, cls_label_targets, ignore_index=0, reduction="sum", ) tot_score += compute_accuracy_for_soft_targets( prediction_soft_label[:, 1:], label_targets[:, 1:]) val_loss += loss.item() n_feat += batch["img_mask_tgt"].sum().item() val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_feat = sum(all_gather_list(n_feat)) tot_time = time() - st val_loss /= n_feat val_acc = tot_score / n_feat val_log = { "loss": val_loss, "acc": val_acc, "feat_per_s": n_feat / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log
def validate_fom(model, val_loader): LOGGER.info("start running FOM validation...") val_loss = 0 n_ex = 0 n_valid_ex = 0 tot_score = 0 st = time() for i, batch in enumerate(val_loader): targets = batch['targets'] batch_size, seq_len = targets.size() vids = batch['vids'] del batch['targets'] del batch['vids'] scores = model(batch, task='fom', compute_loss=False) targets_valid = targets.view(scores.shape[0], ) loc = (targets_valid != -1).nonzero().squeeze() scores_valid = scores[loc, :] targets_valid = targets_valid[loc] loss = F.cross_entropy(scores_valid, targets_valid, reduction='sum') val_loss += loss.item() tot_score += (scores_valid.max( dim=-1, keepdim=False)[1] == targets_valid).sum().item() n_valid_ex += len(targets_valid) n_ex += len(vids) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) n_valid_ex = sum(all_gather_list(n_valid_ex)) tot_time = time() - st val_loss /= n_valid_ex val_acc = tot_score / n_valid_ex val_log = { 'valid/loss': val_loss, 'valid/acc': val_acc, 'valid/ex_per_s': n_ex / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log
def evaluate(model, eval_loader, eval_len, label2ans, save_logits=False, task='vqa'): LOGGER.info("start running evaluation {}...".format(task)) model.eval() n_ex = 0 tot_score = 0 st = time() results = [] logits = {} pbar = tqdm(total=eval_len) for i, batch in enumerate(eval_loader): qids = batch['qids'] scores = model(batch, compute_loss=False, task=task) targets = batch['targets'] tot_score += compute_score_with_logits(scores, targets).sum().item() answers = [ label2ans[i] for i in scores.max(dim=-1, keepdim=False)[1].cpu().tolist() ] for qid, answer in zip(qids, answers): results.append({'answer': answer, 'question_id': qid}) if save_logits: scores = scores.cpu() for i, qid in enumerate(qids): logits[qid] = scores[i].half().numpy() n_ex += len(qids) pbar.update(len(qids)) # TODO: dont commit, for testing only #if i > 4: # break tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) acc = tot_score / n_ex tot_time = time() - st val_log = {'valid/ex_per_s': n_ex / tot_time, 'valid/acc': acc} model.train() LOGGER.info(f"evaluation finished in {int(tot_time)} seconds " f"at {int(n_ex/tot_time)} examples per second") return val_log, results, logits
def validate(model, val_loader): if hvd.rank() == 0: pbar = tqdm(total=len(val_loader)) else: pbar = NoOp() LOGGER.info("start running Image Retrieval validation ...") model.eval() n_ex = 0 st = time() recall_at_1, recall_at_5, recall_at_10 = 0, 0, 0 for batch in val_loader: scores = model(batch, compute_loss=False) _, indices = scores.squeeze(1).topk(10, dim=0) rank = (indices == 0).nonzero() if rank.numel(): rank = rank.item() if rank < 1: recall_at_1 += 1 if rank < 5: recall_at_5 += 1 if rank < 10: recall_at_10 += 1 n_ex += 1 pbar.update(1) n_ex = sum(all_gather_list(n_ex)) recall_at_1 = sum(all_gather_list(recall_at_1)) / n_ex recall_at_5 = sum(all_gather_list(recall_at_5)) / n_ex recall_at_10 = sum(all_gather_list(recall_at_10)) / n_ex tot_time = time() - st val_log = { 'valid/ex_per_s': n_ex / tot_time, 'valid/recall_1': recall_at_1, 'valid/recall_5': recall_at_5, 'valid/recall_10': recall_at_10 } model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"recall_1: {recall_at_1*100:.2f}, " f"recall_5: {recall_at_5*100:.2f}, " f"recall_10: {recall_at_10*100:.2f}") pbar.close() return val_log
def evaluate(model, eval_loader): LOGGER.info("start running evaluation...") model.eval() tot_score = 0 n_ex = 0 st = time() predictions = [] for i, batch in enumerate(eval_loader): (tgt_box_list, obj_boxes_list, sent_ids) = ( batch['tgt_box'], batch['obj_boxes'], batch['sent_ids']) # scores (n, max_num_bb) scores = model(batch, compute_loss=False) ixs = torch.argmax(scores, 1).cpu().detach().numpy() # (n, ) # pred_boxes for ix, obj_boxes, tgt_box, sent_id in \ zip(ixs, obj_boxes_list, tgt_box_list, sent_ids): pred_box = obj_boxes[ix] predictions.append({'sent_id': int(sent_id), 'pred_box': pred_box.tolist(), 'tgt_box': tgt_box.tolist()}) if eval_loader.loader.dataset.computeIoU(pred_box, tgt_box) > .5: tot_score += 1 n_ex += 1 if i % 100 == 0 and hvd.rank() == 0: n_results = len(predictions) n_results *= hvd.size() # an approximation to avoid hangs LOGGER.info(f'{n_results}/{len(eval_loader.dataset)} ' 'answers predicted') n_ex = sum(all_gather_list(n_ex)) tot_time = time()-st tot_score = sum(all_gather_list(tot_score)) val_acc = tot_score / n_ex val_log = {'valid/acc': val_acc, 'valid/ex_per_s': n_ex/tot_time} model.train() LOGGER.info(f"validation ({n_ex} sents) finished in" f" {int(tot_time)} seconds" f", accuracy: {val_acc*100:.2f}%") # summarizae results = {'acc': val_acc, 'predictions': predictions} return val_log, results
def validate(model, val_loader, label2ans): LOGGER.info("start running validation...") model.eval() val_loss = 0 tot_score = 0 n_ex = 0 st = time() results = {} for i, batch in enumerate(val_loader): scores = model(batch, compute_loss=False, task='vqa') targets = batch['targets'] loss = F.binary_cross_entropy_with_logits(scores, targets, reduction='sum') val_loss += loss.item() tot_score += compute_score_with_logits(scores, targets).sum().item() answers = [ label2ans[i] for i in scores.max(dim=-1, keepdim=False)[1].cpu().tolist() ] for qid, answer in zip(batch['qids'], answers): results[qid] = answer n_ex += len(batch['qids']) # TODO: remove grossness when actual training don't commit this #if i > 4: # break val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_loss /= n_ex val_acc = tot_score / n_ex val_log = { 'valid/loss': val_loss, 'valid/acc': val_acc, 'valid/ex_per_s': n_ex / tot_time } model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log, results
def validate(model, val_dataloader): LOGGER.info("start running evaluation.") model.eval() tot_score = 0 n_ex = 0 st = time() predictions = {} for i, batch in enumerate(val_dataloader): # inputs (tgt_box_list, obj_boxes_list, sent_ids) = (batch['tgt_box'], batch['obj_boxes'], batch['sent_ids']) # scores (n, max_num_bb) scores = model(batch, compute_loss=False) ixs = torch.argmax(scores, 1).cpu().detach().numpy() # (n, ) # pred_boxes for ix, obj_boxes, tgt_box, sent_id in \ zip(ixs, obj_boxes_list, tgt_box_list, sent_ids): pred_box = obj_boxes[ix] predictions[int(sent_id)] = { 'pred_box': pred_box.tolist(), 'tgt_box': tgt_box.tolist() } if val_dataloader.loader.dataset.computeIoU(pred_box, tgt_box) > .5: tot_score += 1 n_ex += 1 tot_time = time() - st tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) val_acc = tot_score / n_ex val_log = {'valid/acc': val_acc, 'valid/ex_per_s': n_ex / tot_time} model.train() LOGGER.info( f"validation ({n_ex} sents) finished in {int(tot_time)} seconds" f", accuracy: {val_acc*100:.2f}%") return val_log, predictions
def validate(model, val_loader, label2ans): LOGGER.info("start running validation...") model.eval() val_loss = 0 tot_score = 0 n_ex = 0 st = time() results = {} for i, batch in enumerate(val_loader): scores = model(batch, compute_loss=False) targets = batch["targets"] loss = F.binary_cross_entropy_with_logits(scores, targets, reduction="sum") val_loss += loss.item() tot_score += compute_score_with_logits(scores, targets).sum().item() answers = [ label2ans[i] for i in scores.max(dim=-1, keepdim=False)[1].cpu().tolist() ] for qid, answer in zip(batch["qids"], answers): results[qid] = answer n_ex += len(batch["qids"]) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_loss /= n_ex val_acc = tot_score / n_ex val_log = { "valid/loss": val_loss, "valid/acc": val_acc, "valid/ex_per_s": n_ex / tot_time, } model.train() LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log, results
def decode(loader, generator, tokenizer): st = time() results = [] for batch in tqdm(loader, desc='decoding...'): vids = batch['vid_names'] cids = batch['clip_ids'] all_ts = batch['all_ts'] outputs = generator.greedy_decode(batch) for vid, cid, ts, out_ids in zip(vids, cids, all_ts, outputs): output = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(out_ids)) results.append({'vid_name': vid, 'clip_id': cid, 'ts': ts, 'descs': [{'desc': output}]}) results = [r for rs in all_gather_list(results) for r in rs] print(f'decoding finished in {int(time() - st)} seconds') return results
def validate_itm(model, val_loader): LOGGER.info("start running ITM validation...") val_loss = 0 tot_ot_loss = 0 tot_ot_pos = 0 tot_ot_neg = 0 tot_score = 0 n_ex = 0 st = time() for i, batch in enumerate(val_loader): scores, ot_loss = model(batch, task='itm', compute_loss=False) if ot_loss is not None: if isinstance(ot_loss, tuple): ot_pos, ot_neg = ot_loss ot_pos = ot_pos.sum().item() ot_neg = ot_neg.sum().item() tot_ot_pos += ot_pos tot_ot_neg += ot_neg tot_ot_loss += ot_pos - ot_neg else: tot_ot_loss += ot_loss.sum().item() targets = batch['targets'] loss = F.cross_entropy(scores, targets, reduction='sum') val_loss += loss.item() tot_score += (scores.max(dim=-1)[1] == targets).sum().item() n_ex += len(targets) val_loss = sum(all_gather_list(val_loss)) tot_score = sum(all_gather_list(tot_score)) n_ex = sum(all_gather_list(n_ex)) tot_time = time() - st val_loss /= n_ex val_acc = tot_score / n_ex val_log = { 'valid/loss': val_loss, 'valid/acc': val_acc, 'valid/ex_per_s': n_ex / tot_time } if ot_loss is not None: tot_ot_loss = sum(all_gather_list(tot_ot_loss)) tot_ot_pos = sum(all_gather_list(tot_ot_pos)) tot_ot_neg = sum(all_gather_list(tot_ot_neg)) val_log['valid/ot_loss'] = tot_ot_loss / n_ex val_log['valid/ot_pos'] = tot_ot_pos / n_ex val_log['valid/ot_neg'] = tot_ot_neg / n_ex LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"score: {val_acc*100:.2f}") return val_log
def evaluate(model, eval_loader): model.eval() st = time() LOGGER.info("start running Image/Text Retrieval evaluation ...") score_matrix = inference(model, eval_loader) dset = eval_loader.dataset all_score = hvd.allgather(score_matrix) all_txt_ids = [i for ids in all_gather_list(dset.ids) for i in ids] all_img_ids = dset.all_img_ids assert all_score.size() == (len(all_txt_ids), len(all_img_ids)) if hvd.rank() != 0: return {}, tuple() # NOTE: only use rank0 to compute final scores eval_log = itm_eval(all_score, all_txt_ids, all_img_ids, dset.txt2img, dset.img2txts) results = (all_score, all_txt_ids, all_img_ids) tot_time = time() - st LOGGER.info(f"evaluation finished in {int(tot_time)} seconds, ") return eval_log, results
def validate_mfm_nce(model, val_loader): LOGGER.info("start running MFM-NCE validation...") val_loss = 0 val_l2 = 0 n_correct = 0 cosine = 0 n_feat = 0 n_neg = 0 st = time() for i, batch in enumerate(val_loader): feats, neg_feats = model(batch, task='mfm-nce', compute_loss=False) pos_feats = batch['feat_targets'] logits = model.v_encoder.mfm_nce(feats, pos_feats, neg_feats, compute_loss=False) targets = torch.arange(0, logits.size(0), dtype=torch.long, device=logits.device) val_loss += F.cross_entropy(logits, targets, reduction='sum').item() val_l2 += F.mse_loss(feats, pos_feats, reduction='none').sum(dim=1).sqrt().sum().item() n_correct += (logits.max(dim=-1)[1] == targets).sum().item() cosine += F.cosine_similarity(feats, pos_feats, dim=-1).sum().item() nf = pos_feats.size(0) n_feat += nf n_neg += neg_feats.size(0) * nf val_loss = sum(all_gather_list(val_loss)) val_l2 = sum(all_gather_list(val_l2)) n_correct = sum(all_gather_list(n_correct)) cosine = sum(all_gather_list(cosine)) n_feat = sum(all_gather_list(n_feat)) n_neg = sum(all_gather_list(n_neg)) tot_time = time() - st val_loss /= n_feat val_acc = n_correct / n_feat val_log = { 'loss': val_loss, 'acc': val_acc, 'l2': val_l2 / n_feat, 'cosine': cosine / n_feat, 'feat_per_s': n_feat / tot_time } LOGGER.info(f"validation finished in {int(tot_time)} seconds, " f"loss: {val_loss:.2f}, acc: {val_acc*100:.2f} " f"(average {n_neg/n_feat:.0f} negatives)") return val_log
def train(self, train_iter_fct, valid_iter_fct, train_steps, valid_steps): """ The main training loops. by iterating over training data (i.e. `train_iter_fct`) and running validation (i.e. iterating over `valid_iter_fct` Args: train_iter_fct(function): a function that returns the train iterator. e.g. something like train_iter_fct = lambda: generator(*args, **kwargs) valid_iter_fct(function): same as train_iter_fct, for valid data train_steps(int): valid_steps(int): save_checkpoint_steps(int): Return: None """ logger.info('Start training...') step = self.optim._step + 1 true_batchs = [] accum = 0 normalization = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: reduce_counter = 0 for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): if self.gpu_verbose_level > 1: logger.info("GpuRank %d: index: %d accum: %d" % (self.gpu_rank, i, accum)) true_batchs.append(batch) if self.norm_method == "tokens": num_tokens = batch.tgt[1:].ne( self.train_loss.padding_idx).sum() normalization += num_tokens.item() else: normalization += batch.batch_size accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 if self.gpu_verbose_level > 0: logger.info("GpuRank %d: reduce_counter: %d \ n_minibatch %d" % (self.gpu_rank, reduce_counter, len(true_batchs))) if self.n_gpu > 1: normalization = sum(all_gather_list(normalization)) self._gradient_accumulation(true_batchs, normalization, total_stats, report_stats) report_stats = self._maybe_report_training( step, train_steps, self.optim.learning_rate, report_stats) true_batchs = [] accum = 0 normalization = 0 if (step % valid_steps == 0): if self.gpu_verbose_level > 0: logger.info('GpuRank %d: validate step %d' % (self.gpu_rank, step)) valid_iter = valid_iter_fct() valid_stats = self.validate(valid_iter) if self.gpu_verbose_level > 0: logger.info('GpuRank %d: gather valid stat \ step %d' % (self.gpu_rank, step)) valid_stats = self._maybe_gather_stats(valid_stats) if self.gpu_verbose_level > 0: logger.info('GpuRank %d: report stat step %d' % (self.gpu_rank, step)) self._report_step(self.optim.learning_rate, step, valid_stats=valid_stats) if self.gpu_rank == 0: self._maybe_save(step) step += 1 if step > train_steps: break if self.gpu_verbose_level > 0: logger.info('GpuRank %d: we completed an epoch \ at step %d' % (self.gpu_rank, step)) train_iter = train_iter_fct() return total_stats