Exemple #1
0
tminerr = inf_default

minloss, minerr = eva(vd, vl, mymodel, lossf, cuda_device, multi_gpu, use_amp)
logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))),
                     ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr))))

if fine_tune_m is None:
    save_model(mymodel, wkdir + "init.h5", multi_gpu, logger)
    logger.info("Initial model saved")
else:
    cnt_states = cnfg.train_statesf
    if (cnt_states is not None) and p_check(cnt_states):
        logger.info("Continue last epoch")
        tminerr, done_tokens, cur_checkid, remain_steps, _ = train(
            td, load_states(cnt_states), vd, vl, optimizer, lrsch, mymodel,
            lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm,
            batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint,
            cur_checkid, report_eva, remain_steps, False, False, scaler)
        vloss, vprec = eva(vd, vl, mymodel, lossf, cuda_device, multi_gpu,
                           use_amp)
        logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" %
                    (tminerr, vloss, vprec))
        save_model(
            mymodel,
            wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec),
            multi_gpu, logger)
        if save_optm_state:
            h5save(
                optimizer.state_dict(), wkdir +
                "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec))
Exemple #2
0
tminerr = inf_default

minloss, minerr = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu,
                      use_amp)
logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))),
                     ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr))))

if fine_tune_m is None:
    save_model(mymodel, wkdir + "init.h5", multi_gpu, logger)
    logger.info("Initial model saved")
else:
    cnt_states = cnfg.train_statesf
    if (cnt_states is not None) and p_check(cnt_states):
        logger.info("Continue last epoch")
        tminerr, done_tokens, cur_checkid, remain_steps, _ = train(
            td, load_states(cnt_states), vd, nvalid, optimizer, lrsch, mymodel,
            lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm,
            batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint,
            cur_checkid, report_eva, remain_steps, False, False, scaler)
        vloss, vprec = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu,
                           use_amp)
        logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" %
                    (tminerr, vloss, vprec))
        save_model(
            mymodel,
            wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec),
            multi_gpu, logger)
        if save_optm_state:
            h5save(
                optimizer.state_dict(), wkdir +
                "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec))
num_checkpoint = cnfg.num_checkpoint
cur_checkid = 0

tminerr = inf_default

minloss, minerr = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp)
logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))), ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr))))

if fine_tune_m is None:
	save_model(mymodel, wkdir + "init.h5", multi_gpu, logger)
	logger.info("Initial model saved")
else:
	cnt_states = cnfg.train_statesf
	if (cnt_states is not None) and p_check(cnt_states):
		logger.info("Continue last epoch")
		tminerr, done_tokens, cur_checkid, remain_steps, _ = train(td, load_states(cnt_states), vd, nvalid, optimizer, lrsch, mymodel, lossf, cuda_device, logger, done_tokens, multi_gpu, tokens_optm, batch_report, save_every, chkpf, chkpof, statesf, num_checkpoint, cur_checkid, report_eva, remain_steps, False, False, scaler)
		vloss, vprec = eva(vd, nvalid, mymodel, lossf, cuda_device, multi_gpu, use_amp)
		logger.info("Epoch: 0, train loss: %.3f, valid loss/error: %.3f %.2f" % (tminerr, vloss, vprec))
		save_model(mymodel, wkdir + "train_0_%.3f_%.3f_%.2f.h5" % (tminerr, vloss, vprec), multi_gpu, logger)
		if save_optm_state:
			h5save(optimizer.state_dict(), wkdir + "train_0_%.3f_%.3f_%.2f.optm.h5" % (tminerr, vloss, vprec))
		logger.info("New best model saved")

if cnfg.dss_ws is not None and cnfg.dss_ws > 0.0 and cnfg.dss_ws < 1.0:
	dss_ws = int(cnfg.dss_ws * ntrain)
	_Dws = {}
	_prev_Dws = {}
	_crit_inc = {}
	if cnfg.dss_rm is not None and cnfg.dss_rm > 0.0 and cnfg.dss_rm < 1.0:
		dss_rm = int(cnfg.dss_rm * ntrain * (1.0 - cnfg.dss_ws))
	else: