if ib % args.optim_every == 0:
            optimizer.step()
            optimizer.zero_grad()

        T7 = time.time()
        Timer['optim'] = T7 - T6

        # log_obj['summary_nwords'] = int(np.mean([summ.count(" ")+1 for summ in sampled_summaries]))
        avg_total = total_sampled_scores.mean().item()

        total_score_history.append(avg_total)
        log_obj['summary_nwords'] = int(np.mean(sampled_end_idxs))
        log_obj['loss'] = Loss.item()
        log_obj['total_score'] = avg_total
        log_obj['count'] = batch_size
        logplot.cache(log_obj, prefix="T_")

        Tfinal = time.time()
        Timer['total'] = Tfinal - T1
        # print(Timer)

        if (time.time() - time_save > args.save_every):
            print("==========================================")
            print(bodies[0])
            print("-----------")
            print(sampled_summaries[0])
            print("-----------")
            print("Total score:", total_sampled_scores[0].item())
            for scorer in scorers:
                print(scorer['name'] + " score:",
                      scores_track[scorer['name'] + "_scores"][0].item())
                                                 is_next)

        loss.backward()
        is_next_acc = is_next.eq(torch.argmax(is_next_logits,
                                              dim=1)).float().mean().item()

        num_predicts = (1.0 - lm_label_ids.eq(-1)).sum().item()
        mlm_acc = (lm_label_ids.view(-1).eq(
            torch.argmax(mlm_logits, dim=2).view(-1)).float().sum() /
                   num_predicts).item()

        if ib % args.optim_every == 0:
            scheduler.step()  # Update learning rate schedule
            optimizer.step()
            optimizer.zero_grad()
            torch.cuda.empty_cache()

        summ.cache(
            {
                "loss": loss.item(),
                "mlm_acc": mlm_acc,
                "is_next_acc": is_next_acc
            },
            prefix="T_")
        if time.time() - time_save > 60.0:
            summ.save(printing=True)
            time_save = time.time()
            torch.save(
                model.state_dict(), "/home/phillab/models/news_bert_bs" +
                str(args.optim_every * args.train_batch_size) + ".bin")
if args.fp16:
    try:
        from apex import amp
    except ImportError:
        raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
    kw_cov.model, optimizer = amp.initialize(kw_cov.model, optimizer, opt_level="O1") # For now O1. See details at https://nvidia.github.io/apex/amp.html

time_save = time.time()
optim_every = 4

for ib, batch in enumerate(dataloader):
    contents, summaries = batch
    loss, acc = kw_cov.train_batch(contents, summaries)
    if args.fp16:
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
    else:
        loss.backward()

    if ib%optim_every == 0:
        scheduler.step()  # Update learning rate schedule
        optimizer.step()
        optimizer.zero_grad()

    logplot.cache({"loss": loss.item(), "accuracy": acc, "count": len(batch)}, prefix="T_")
    if time.time()-time_save > 60.0:
        logplot.save(printing=True)
        time_save = time.time()
        kw_cov.save_model("/home/phillab/models/bert_coverage_"+args.experiment+".bin")
Exemple #4
0
        model.train()
        sources, targets = map_batch(batch, args.task)

        loss = model.train_batch(sources, targets, no_preinput=no_preinput)
        if args.fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        if ib % args.optim_every == 0:
            optimizer.step()
            scheduler.step()  # Update learning rate schedule
            optimizer.zero_grad()

        summ.cache({"loss": loss.item(), "count": len(batch)}, prefix="T_")
        if time.time() - time_save > 60.0:

            print("Starting the eval")
            model.eval()

            with torch.no_grad():
                for batch in tqdm.tqdm(dl_dev):
                    sources, targets = map_batch(batch, args.task)
                    loss = model.train_batch(sources,
                                             targets,
                                             no_preinput=no_preinput)
                    summ.cache({
                        "loss": loss.item(),
                        "count": len(batch)
                    },