Example #1
0
            os.path.join(
                model_dir,
                'pg-{}-TSP{}-epoch-{}.pt'.format(str(id), args.n_points,
                                                 epoch)))
        torch.save(
            policy,
            os.path.join(
                model_dir, 'full-model-pg-{}-TSP{}-epoch-{}.pt'.format(
                    str(id), args.n_points, epoch)))
        best_running_reward = val_rwd_log.exp_avg
        val_best_dist = val_best_dist_log.val
        best_gap = gap

    if epoch % args.log_interval == 0:

        train_rwd_log.log(log)
        train_init_dist_log.log(log)
        train_best_dist_log.log(log)

        train_policy_loss_log.log(log)
        train_entropy_loss_log.log(log)
        train_value_loss_log.log(log)
        train_loss_log.log(log)

        val_rwd_log.log(log)
        val_init_dist_log.log(log)
        val_best_dist_log.log(log)

        print(
            '\033[1;32;40m Train - epoch:{} |rwd: {:.2f}'.format(
                epoch, train_rwd_log.val),
Example #2
0
    step_list = [1, 8, 16, 24, 50, 100]
    print('training start now: ')
    for n_step in step_list:
        i = -1
        env = EnvironmentState()
        state_set = StateDataset(env,
                                 skip_step=n_step,
                                 size=7000000,
                                 random_torque=True,
                                 remove_torque=False)
        state_set_loader = DataLoader(state_set, batch_size=1024)

        predictor = Predictor(state_set.output_size, 2)
        predictor.stepper.recurrent_step = 0

        avm = AverageMeter()
        with Timer():
            for s in state_set_loader:
                i += 1
                d = s['s1'][:, 12:14] - s['s0'][:, 12:14]  # with torques
                r, loss = predictor.optimize(s['s0'], d)
                avm.log(loss)
                if i % 10 == 0:
                    print(
                        f'epoch {i}: trn loss {avm.value:.4f} {avm.std:.4f}, rmse {avm.value**0.5:.4f}, '
                        f'pred to stat error ratio: {torch.mean(torch.abs(r / d)):.4f} '
                        f'max_d {torch.max(d):.2f} {torch.min(d):.2f}, max_r {torch.max(r):.2f} {torch.min(r):.2f}'
                    )
        predictor.save(f'predictor{n_step}step.pt')

    print('training finished.')