tminerr = inf_default minloss, minerr = eva(vd, vl, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))), ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr)))) if fine_tune_m is None: save_model(mymodel, wkdir + "init.h5", multi_gpu, logger) logger.info("Initial model saved") else: cnt_states = cnfg.train_statesf if (cnt_states is not None) and p_check(cnt_states): logger.info("Continue last epoch") tminerr, done_tokens, cur_checkid, remain_steps, _ = train( td, load_states(cnt_states), vd, vl, optimizer, lrsch, mymodel, lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm, batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint, cur_checkid, report_eva, remain_steps, False, False, scaler) vloss, vprec = eva(vd, vl, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" % (tminerr, vloss, vprec)) save_model( mymodel, wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec), multi_gpu, logger) if save_optm_state: h5save( optimizer.state_dict(), wkdir + "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec))
tminerr = inf_default minloss, minerr = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))), ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr)))) if fine_tune_m is None: save_model(mymodel, wkdir + "init.h5", multi_gpu, logger) logger.info("Initial model saved") else: cnt_states = cnfg.train_statesf if (cnt_states is not None) and p_check(cnt_states): logger.info("Continue last epoch") tminerr, done_tokens, cur_checkid, remain_steps, _ = train( td, load_states(cnt_states), vd, nvalid, optimizer, lrsch, mymodel, lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm, batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint, cur_checkid, report_eva, remain_steps, False, False, scaler) vloss, vprec = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" % (tminerr, vloss, vprec)) save_model( mymodel, wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec), multi_gpu, logger) if save_optm_state: h5save( optimizer.state_dict(), wkdir + "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec))
num_checkpoint = cnfg.num_checkpoint cur_checkid = 0 tminerr = inf_default minloss, minerr = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))), ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr)))) if fine_tune_m is None: save_model(mymodel, wkdir + "init.h5", multi_gpu, logger) logger.info("Initial model saved") else: cnt_states = cnfg.train_statesf if (cnt_states is not None) and p_check(cnt_states): logger.info("Continue last epoch") tminerr, done_tokens, cur_checkid, remain_steps, _ = train(td, load_states(cnt_states), vd, nvalid, optimizer, lrsch, mymodel, lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm, batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint, cur_checkid, report_eva, remain_steps, False, False, scaler) vloss, vprec = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" % (tminerr, vloss, vprec)) save_model(mymodel, wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec), multi_gpu, logger) if save_optm_state: h5save(optimizer.state_dict(), wkdir + "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec)) logger.info("New best model saved") if cnfg.dss_ws is not None and cnfg.dss_ws > 0.0 and cnfg.dss_ws < 1.0: dss_ws = int(cnfg.dss_ws * ntrain) _Dws = {} _prev_Dws = {} _crit_inc = {} if cnfg.dss_rm is not None and cnfg.dss_rm > 0.0 and cnfg.dss_rm < 1.0: dss_rm = int(cnfg.dss_rm * ntrain * (1.0 - cnfg.dss_ws)) else: