def on_rollout(self, r, at_steps): state_arrays = r['state_arrays'] if len(state_arrays) == 1: obs_of_step = state_arrays[0] elif len(state_arrays) == 2: obs_of_step = [numpy.concatenate([p,q]) for p,q in zip(state_arrays[0], state_arrays[1])] else: raise ValueError("Invalid state_arrays format") obs_text = format_observations([self.uncook_observation(obs) for obs in obs_of_step]) reward = numpy.sum(r['rewards']) * lot_scheduling.REWARDSCALE duration = numpy.sum(r['durations']) r_rate = reward / (duration / 3600.0) if duration > 0 else 0.0 actions_text = format_actions(r['actions']) ep_steps = len(r['rewards']) if self.losses_text == '': self.losses_text = format_losses(self.losses, self.batch_count) self.losses = defaultdict(float) self.batch_count = 0 print("{} episode {} at {} steps: reward={:.3f} rate={:.3f} duration={:.3f} steps={} {} {} {}" .format(iso8601stamp(), self.rollout_count, at_steps, reward, r_rate, duration, ep_steps, actions_text, obs_text, self.losses_text)) ep = self.rollout_count self.rollout_count += 1 return self.check_best(ep, at_steps, reward, duration)
total_loss_p += loss_p.item() del inputs, actions, targets total_loss_i /= nbatches total_loss_s /= nbatches total_loss_p /= nbatches return total_loss_i, total_loss_s, total_loss_p print('[training]') for i in range(200): t0 = time.time() train_losses = train(opt.epoch_size, opt.npred) valid_losses = test(int(opt.epoch_size / 2)) n_iter += opt.epoch_size model.cpu() torch.save( { 'model': model, 'optimizer': optimizer.state_dict(), 'n_iter': n_iter }, opt.model_file + '.model') if (n_iter / opt.epoch_size) % 10 == 0: torch.save(model, opt.model_file + f'.step{n_iter}.model') model.cuda() log_string = f'step {n_iter} | ' log_string += utils.format_losses(*train_losses, split='train') log_string += utils.format_losses(*valid_losses, split='valid') print(log_string) utils.log(opt.model_file + '.log', log_string)
if opt.test_only == 1: print('[testing]') valid_losses = test(10, 200) else: print('[training]') utils.log(opt.model_file + '.log', f'[job name: {opt.model_file}]') npred = opt.npred if opt.npred != -1 else 16 for i in range(500): bsize = get_batch_size(npred) dataloader.opt.batch_size = bsize train_losses = train(opt.epoch_size, npred) valid_losses = test(int(opt.epoch_size / 2), npred) n_iter += opt.epoch_size model.intype('cpu') torch.save({'model': model, 'optimizer': optimizer.state_dict(), 'opt': opt, 'npred': npred, 'n_iter': n_iter}, opt.model_file + '.model') model.intype('gpu') log_string = f'step {n_iter} | npred {npred} | bsize {bsize} | esize {opt.epoch_size} | ' log_string += utils.format_losses(train_losses[0], train_losses[1], split='train') log_string += utils.format_losses(valid_losses[0], valid_losses[1], split='valid') print(log_string) utils.log(opt.model_file + '.log', log_string) if i > 0 and(i % opt.curriculum_length == 0) and (opt.npred == -1) and npred < 400: npred += 8