opt = OptimizerCreator( hparams["meta"]["optimizer"], learning_rate=hparams["natural_lr"]).get_optimizer() ma_loss = [] for epoch in range(hparams["warmup_period"]): for b_j in range(num_batches): batch = next(data_loader) ae_grads = compute_grad_fn(ae_params, bparam, batch) ae_params = opt.update_params(ae_params, ae_grads[0], step_index=epoch) #bparam = opt.update_params(bparam, b_grads, step_index=epoch) loss = problem.objective(ae_params, bparam, batch) ma_loss.append(loss) print(f"loss:{loss} norm:{l2_norm(ae_grads)}") opt.lr = exp_decay(epoch, hparams["natural_lr"]) mlflow.log_metrics( { "train_loss": float(loss), "ma_loss": float(ma_loss[-1]), "learning_rate": float(opt.lr), "bparam": float(bparam[0]), "norm grads": float(l2_norm(ae_grads)) }, epoch) if len(ma_loss) > 100: loss_check = running_mean(ma_loss, 50) if math.isclose(loss_check[-1], loss_check[-2], abs_tol=hparams["loss_tol"]): print(f"stopping at {epoch}")
opt = OptimizerCreator( hparams["meta"]["optimizer"], learning_rate=hparams["natural_lr"]).get_optimizer() ma_loss = [] for epoch in range(500): for b_j in range(num_batches): batch = next(data_loader) grads = compute_grad_fn(ae_params, bparam, batch) ae_params = opt.update_params(ae_params, grads[0], step_index=epoch) loss = problem.objective(ae_params, bparam, batch) ma_loss.append(loss) print(f"loss:{loss} norm:{l2_norm(grads)}") opt.lr = exp_decay(epoch, hparams["descent_lr"]) mlflow.log_metrics( { "train_loss": float(loss), "ma_loss": float(ma_loss[-1]), "learning_rate": float(opt.lr), "norm grads": float(l2_norm(grads)) }, epoch) if len(ma_loss) > 100: loss_check = running_mean(ma_loss, 50) if math.isclose(loss_check[-1], loss_check[-2], abs_tol=hparams["loss_tol"]): print(f"stopping at {epoch}") break