Timer['total'] = Tfinal - T1 # print(Timer) if (time.time() - time_save > args.save_every): print("==========================================") print(bodies[0]) print("-----------") print(sampled_summaries[0]) print("-----------") print("Total score:", total_sampled_scores[0].item()) for scorer in scorers: print(scorer['name'] + " score:", scores_track[scorer['name'] + "_scores"][0].item()) print("-----------") logplot.save(printing=True) # print(Timer) time_save = time.time() print("==========================================") if ckpt_every > 0 and len(total_score_history) > ckpt_lookback: current_score = np.mean(total_score_history[-ckpt_lookback:]) if time.time() - time_ckpt > ckpt_every: revert_ckpt = best_ckpt_score is not None and current_score < min( 1.2 * best_ckpt_score, 0.8 * best_ckpt_score) # Could be negative or positive print("================================== CKPT TIME, " + str(datetime.now()) + " =================================")
is_next) loss.backward() is_next_acc = is_next.eq(torch.argmax(is_next_logits, dim=1)).float().mean().item() num_predicts = (1.0 - lm_label_ids.eq(-1)).sum().item() mlm_acc = (lm_label_ids.view(-1).eq( torch.argmax(mlm_logits, dim=2).view(-1)).float().sum() / num_predicts).item() if ib % args.optim_every == 0: scheduler.step() # Update learning rate schedule optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() summ.cache( { "loss": loss.item(), "mlm_acc": mlm_acc, "is_next_acc": is_next_acc }, prefix="T_") if time.time() - time_save > 60.0: summ.save(printing=True) time_save = time.time() torch.save( model.state_dict(), "/home/phillab/models/news_bert_bs" + str(args.optim_every * args.train_batch_size) + ".bin")