def on_rollout(self, r, at_steps):
        state_arrays = r['state_arrays']
        if len(state_arrays) == 1:
            obs_of_step = state_arrays[0]
        elif len(state_arrays) == 2:
            obs_of_step = [numpy.concatenate([p,q])
                           for p,q in zip(state_arrays[0], state_arrays[1])]
        else:
            raise ValueError("Invalid state_arrays format")
        obs_text = format_observations([self.uncook_observation(obs)
                                        for obs in obs_of_step])
        reward = numpy.sum(r['rewards']) * lot_scheduling.REWARDSCALE
        duration = numpy.sum(r['durations'])
        r_rate = reward / (duration / 3600.0) if duration > 0 else 0.0
        actions_text = format_actions(r['actions'])
        ep_steps = len(r['rewards'])
        if self.losses_text == '':
            self.losses_text = format_losses(self.losses, self.batch_count)
            self.losses = defaultdict(float)
            self.batch_count = 0

        print("{} episode {} at {} steps: reward={:.3f} rate={:.3f} duration={:.3f} steps={} {} {} {}"
              .format(iso8601stamp(),
                      self.rollout_count, at_steps, reward, r_rate, duration,
                      ep_steps, actions_text, obs_text, self.losses_text))
        ep = self.rollout_count
        self.rollout_count += 1
        return self.check_best(ep, at_steps, reward, duration)
Beispiel #2
0
        total_loss_p += loss_p.item()
        del inputs, actions, targets

    total_loss_i /= nbatches
    total_loss_s /= nbatches
    total_loss_p /= nbatches
    return total_loss_i, total_loss_s, total_loss_p


print('[training]')
for i in range(200):
    t0 = time.time()
    train_losses = train(opt.epoch_size, opt.npred)
    valid_losses = test(int(opt.epoch_size / 2))
    n_iter += opt.epoch_size
    model.cpu()
    torch.save(
        {
            'model': model,
            'optimizer': optimizer.state_dict(),
            'n_iter': n_iter
        }, opt.model_file + '.model')
    if (n_iter / opt.epoch_size) % 10 == 0:
        torch.save(model, opt.model_file + f'.step{n_iter}.model')
    model.cuda()
    log_string = f'step {n_iter} | '
    log_string += utils.format_losses(*train_losses, split='train')
    log_string += utils.format_losses(*valid_losses, split='valid')
    print(log_string)
    utils.log(opt.model_file + '.log', log_string)
Beispiel #3
0
if opt.test_only == 1:
    print('[testing]')
    valid_losses = test(10, 200)
else:
    print('[training]')
    utils.log(opt.model_file + '.log', f'[job name: {opt.model_file}]')
    npred = opt.npred if opt.npred != -1 else 16

    for i in range(500):
        bsize = get_batch_size(npred)
        dataloader.opt.batch_size = bsize
        train_losses = train(opt.epoch_size, npred)
        valid_losses = test(int(opt.epoch_size / 2), npred)
        n_iter += opt.epoch_size
        model.intype('cpu')
        torch.save({'model': model,
                    'optimizer': optimizer.state_dict(),
                    'opt': opt,
                    'npred': npred,
                    'n_iter': n_iter},
                   opt.model_file + '.model')
        model.intype('gpu')
        log_string = f'step {n_iter} | npred {npred} | bsize {bsize} | esize {opt.epoch_size} | '
        log_string += utils.format_losses(train_losses[0], train_losses[1], split='train')
        log_string += utils.format_losses(valid_losses[0], valid_losses[1], split='valid')
        print(log_string)
        utils.log(opt.model_file + '.log', log_string)
        if i > 0 and(i % opt.curriculum_length == 0) and (opt.npred == -1) and npred < 400:
            npred += 8