def train(): if not os.path.isdir(save_dir): os.makedirs(save_dir) model.load_from_ckpt() # save hyperparams with open(os.path.join(save_dir, 'args.json'), 'w') as f: json.dump(args.__dict__, f, sort_keys=True, indent=4) optimizer, scheduler = model.build_optimizer() logger = get_logger(exp_id, os.path.join(save_dir, 'train_'+time.strftime('%Y%m%d-%H%M')+'.log')) accm = Accumulator(*model.metrics) train_accm = Accumulator('loss') tick = time.time() for t, batch in enumerate(train_loader, 1): net.train() optimizer.zero_grad() loss = model.loss_fn(batch) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.clip) optimizer.step() scheduler.step() train_accm.update(loss.item()) if t % args.test_freq == 0: line = 'step {}, lr {:.3e}, train loss {:.4f}, '.format( t, optimizer.param_groups[0]['lr'], train_accm.get('loss')) line += test(accm=accm, verbose=False) logger.info(line) accm.reset() train_accm.reset() if t % args.save_freq == 0: if args.save_all: torch.save(net.state_dict(), os.path.join(save_dir, 'model{}.tar'.format(t))) torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar')) torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar'))
def train(): if not os.path.isdir(save_dir): os.makedirs(save_dir) # save hyperparams with open(os.path.join(save_dir, 'args.json'), 'w') as f: json.dump(args.__dict__, f, sort_keys=True, indent=4) model.build_train_loader() model.build_test_loader() model.build_optimizer() train_accm = Accumulator(*model.train_metrics) test_accm = Accumulator(*model.test_metrics) logger = get_logger( exp_id, os.path.join(save_dir, 'train_' + time.strftime('%Y%m%d-%H%M') + '.log')) for t, batch in enumerate(model.train_loader, 1): model.train_batch(batch, train_accm) if t % args.test_freq == 0: line = 'step {}, '.format(t) line += model.get_lr_string() + ', ' line += train_accm.info(header='train', show_et=False) model.test(test_accm) line += test_accm.info(header='test', ) logger.info(line) train_accm.reset() test_accm.reset() if t % args.save_freq == 0: model.save(os.path.join(save_dir, 'model.tar')) model.save(os.path.join(save_dir, 'model.tar'))