コード例 #1
0
def main(args):
    log_path = args.log_path
    epoch = args.epoch

    mdp = PendulumMDP()

    # load the specified model
    with open(log_path + '/settings', 'r') as f:
        settings = json.load(f)
    armotized = settings['armotized']
    model = PCC(armotized=armotized, x_dim=4608, z_dim=3, u_dim=1, env='pendulum')
    model.load_state_dict(torch.load(log_path + '/model_' + str(epoch), map_location='cpu'))
    model.eval()

    show_latent_map(model, mdp)
コード例 #2
0
def main(args):
    log_path = args.log_path
    epoch = args.epoch

    mdp = PlanarObstaclesMDP()

    # load the specified model
    with open(log_path + '/settings', 'r') as f:
        settings = json.load(f)
    armotized = settings['armotized']
    model = PCC(armotized, 1600, 2, 2, 'planar')
    model.load_state_dict(
        torch.load(log_path + '/model_' + str(epoch), map_location='cpu'))
    model.eval()

    show_latent_map(model, mdp)
コード例 #3
0
ファイル: ilqr.py プロジェクト: doublecc161/PCC-pytorch
def main(args):
    task_name = args.task
    assert task_name in ['planar', 'balance', 'swing', 'cartpole', 'threepole', 'pendulum_gym', 'mountain_car']
    env_name = 'pendulum' if task_name in ['balance', 'swing'] else task_name

    setting_path = args.setting_path
    setting = os.path.basename(os.path.normpath(setting_path))
    noise = args.noise
    epoch = args.epoch
    x_dim, z_dim, u_dim = env_data_dim[env_name]
    if env_name in ['planar', 'pendulum']:
        x_dim = np.prod(x_dim)

    ilqr_result_path = 'iLQR_result/' + '_'.join([task_name, str(setting), str(noise), str(epoch)])
    if not os.path.exists(ilqr_result_path):
        os.makedirs(ilqr_result_path)
    with open(ilqr_result_path + '/settings', 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    # each trained model will perform 10 random tasks
    all_task_configs = []
    for task_counter in range(10):
        # config for this task
        with open(config_path[task_name]) as f:
            config = json.load(f)

        # sample random start and goal state
        s_start_min, s_start_max = config['start_min'], config['start_max']
        config['s_start'] = np.random.uniform(low=s_start_min, high=s_start_max)
        s_goal = config['goal'][np.random.choice(len(config['goal']))]
        config['s_goal'] = np.array(s_goal)

        all_task_configs.append(config)

    # the folder where all trained models are saved
    log_folders = [os.path.join(setting_path, dI) for dI in os.listdir(setting_path) if os.path.isdir(os.path.join(setting_path, dI))]
    log_folders.sort()

    # statistics on all trained models
    avg_model_percent = 0.0
    best_model_percent = 0.0
    for log in log_folders:
        with open(log + '/settings', 'r') as f:
            settings = json.load(f)
            armotized = settings['armotized']

        log_base = os.path.basename(os.path.normpath(log))
        model_path = ilqr_result_path + '/' + log_base
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        print('iLQR for ' + log_base)

        # load the trained model
        model = PCC(armotized, x_dim, z_dim, u_dim, env_name)
        model.load_state_dict(torch.load(log + '/model_' + str(epoch), map_location='cpu'))
        model.eval()
        dynamics = model.dynamics
        encoder = model.encoder

        # run the task with 10 different start and goal states for a particular model
        avg_percent = 0.0
        for task_counter, config in enumerate(all_task_configs):

            print('Performing task %d: ' %(task_counter) + str(config['task']))

            # environment specification
            horizon = config['horizon_prob']
            plan_len = config['plan_len']

            # ilqr specification
            R_z = config['q_weight'] * np.eye(z_dim)
            R_u = config['r_weight'] * np.eye(u_dim)
            num_uniform = config['uniform_trajs']
            num_extreme = config['extreme_trajs']
            ilqr_iters = config['ilqr_iters']
            inv_regulator_init = config['pinv_init']
            inv_regulator_multi = config['pinv_mult']
            inv_regulator_max = config['pinv_max']
            alpha_init = config['alpha_init']
            alpha_mult = config['alpha_mult']
            alpha_min = config['alpha_min']

            s_start = config['s_start']
            s_goal = config['s_goal']

            # mdp
            if env_name == 'planar':
                mdp = PlanarObstaclesMDP(goal=s_goal, goal_thres=config['distance_thresh'],
                                         noise=noise)
            elif env_name == 'pendulum':
                mdp = PendulumMDP(frequency=config['frequency'],
                                              noise=noise, torque=config['torque'])
            elif env_name == 'cartpole':
                mdp = CartPoleMDP(frequency=config['frequency'], noise=noise)
            elif env_name == 'threepole':
                mdp = ThreePoleMDP(frequency=config['frequency'], noise=noise, torque=config['torque'])
            # get z_start and z_goal
            x_start = get_x_data(mdp, s_start, config)
            x_goal = get_x_data(mdp, s_goal, config)
            with torch.no_grad():
                z_start = encoder(x_start).mean
                z_goal = encoder(x_goal).mean
            z_start = z_start.squeeze().numpy()
            z_goal = z_goal.squeeze().numpy()

            # initialize actions trajectories
            all_actions_trajs = random_actions_trajs(mdp, num_uniform, num_extreme, plan_len)

            # perform reciding horizon iLQR
            s_start_horizon = np.copy(s_start)  # s_start and z_start is changed at each horizon
            z_start_horizon = np.copy(z_start)
            obs_traj = [mdp.render(s_start).squeeze()]
            goal_counter = 0.0
            for plan_iter in range(1, horizon + 1):
                latent_cost_list = [None] * len(all_actions_trajs)
                # iterate over all trajectories
                for traj_id in range(len(all_actions_trajs)):
                    # initialize the inverse regulator
                    inv_regulator = inv_regulator_init
                    for iter in range(1, ilqr_iters + 1):
                        u_seq = all_actions_trajs[traj_id]
                        z_seq = compute_latent_traj(z_start_horizon, u_seq, dynamics)
                        # compute the linearization matrices
                        A_seq, B_seq = seq_jacobian(dynamics, z_seq, u_seq)
                        # run backward
                        k_small, K_big = backward(R_z, R_u, z_seq, u_seq,
                                                  z_goal, A_seq, B_seq, inv_regulator)
                        current_cost = latent_cost(R_z, R_u, z_seq, z_goal, u_seq)
                        # forward using line search
                        alpha = alpha_init
                        accept = False  # if any alpha is accepted
                        while alpha > alpha_min:
                            z_seq_cand, u_seq_cand = forward(z_seq, all_actions_trajs[traj_id], k_small, K_big, dynamics, alpha)
                            cost_cand = latent_cost(R_z, R_u, z_seq_cand, z_goal, u_seq_cand)
                            if cost_cand < current_cost:  # accept the trajectory candidate
                                accept = True
                                all_actions_trajs[traj_id] = u_seq_cand
                                latent_cost_list[traj_id] = cost_cand
                                break
                            else:
                                alpha *= alpha_mult
                        if accept:
                            inv_regulator = inv_regulator_init
                        else:
                            inv_regulator *= inv_regulator_multi
                        if inv_regulator > inv_regulator_max:
                            break

                for i in range(len(latent_cost_list)):
                    if latent_cost_list[i] == None:
                        latent_cost_list[i] = np.inf
                traj_opt_id = np.argmin(latent_cost_list)
                action_chosen = all_actions_trajs[traj_opt_id][0]
                s_start_horizon, z_start_horizon = update_horizon_start(mdp, s_start_horizon,
                                                                        action_chosen, encoder, config)

                obs_traj.append(mdp.render(s_start_horizon).squeeze())
                goal_counter += mdp.reward_function(s_start_horizon)

                all_actions_trajs = refresh_actions_trajs(all_actions_trajs, traj_opt_id, mdp,
                                                          np.min([plan_len, horizon - plan_iter]),
                                                          num_uniform, num_extreme)

            # compute the percentage close to goal
            success_rate = goal_counter / horizon
            print ('Success rate: %.2f' % (success_rate))
            percent = success_rate
            avg_percent += success_rate
            with open(model_path + '/result.txt', 'a+') as f:
                f.write(config['task'] + ': ' + str(percent) + '\n')

            # save trajectory as gif file
            gif_path = model_path + '/task_{:01d}.gif'.format(task_counter + 1)
            save_traj(obs_traj, mdp.render(s_goal).squeeze(), gif_path, config['task'])

        avg_percent = avg_percent / 10
        print ('Average success rate: ' + str(avg_percent))
        print ("====================================")
        avg_model_percent += avg_percent
        if avg_percent > best_model_percent:
            best_model = log_base
            best_model_percent = avg_percent
        with open(model_path + '/result.txt', 'a+') as f:
            f.write('Average percentage: ' + str(avg_percent))

    avg_model_percent = avg_model_percent / len(log_folders)
    with open(ilqr_result_path + '/result.txt', 'w') as f:
        f.write('Average percentage of all models: ' + str(avg_model_percent) + '\n')
        f.write('Best model: ' + best_model + ', best percentage: ' + str(best_model_percent))
コード例 #4
0
ファイル: train_pcc.py プロジェクト: doublecc161/PCC-pytorch
def main(args):
    env_name = args.env
    assert env_name in ['planar', 'pendulum', 'cartpole', 'threepole']
    armotized = args.armotized
    log_dir = args.log_dir
    seed = args.seed
    data_size = args.data_size
    noise_level = args.noise
    batch_size = args.batch_size
    lam_p = args.lam_p
    lam_c = args.lam_c
    lam_cur = args.lam_cur
    lam = (lam_p, lam_c, lam_cur)
    vae_coeff = args.vae_coeff
    determ_coeff = args.determ_coeff
    lr = args.lr
    weight_decay = args.decay
    epoches = args.num_iter
    iter_save = args.iter_save
    save_map = args.save_map

    seed_torch(seed)
    def _init_fn(worker_id):
        np.random.seed(int(seed))

    dataset = datasets[env_name]
    data = dataset(sample_size=data_size, noise=noise_level)
    data_loader = DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4, worker_init_fn=_init_fn)

    x_dim, z_dim, u_dim = dims[env_name]
    model = PCC(armotized=armotized, x_dim=x_dim, z_dim=z_dim, u_dim=u_dim, env=env_name).to(device)

    if env_name == 'planar' and save_map:
        mdp = PlanarObstaclesMDP(noise=noise_level)

    optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-8, lr=lr, weight_decay=weight_decay)

    log_path = 'logs/' + env_name + '/' + log_dir
    if not path.exists(log_path):
        os.makedirs(log_path)
    writer = SummaryWriter(log_path)

    result_path = 'result/' + env_name + '/' + log_dir
    if not path.exists(result_path):
        os.makedirs(result_path)
    with open(result_path + '/settings', 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    if env_name == 'planar' and save_map:
        latent_maps = [draw_latent_map(model, mdp)]
    for i in range(epoches):
        avg_pred_loss, avg_consis_loss, avg_cur_loss, avg_loss = train(model, env_name, data_loader, lam,
                                                                       vae_coeff, determ_coeff, optimizer, armotized, i)

        # ...log the running loss
        writer.add_scalar('prediction loss', avg_pred_loss, i)
        writer.add_scalar('consistency loss', avg_consis_loss, i)
        writer.add_scalar('curvature loss', avg_cur_loss, i)
        writer.add_scalar('training loss', avg_loss, i)
        if env_name == 'planar' and save_map:
            if (i+1) % 10 == 0:
                map_i = draw_latent_map(model, mdp)
                latent_maps.append(map_i)
        # save model
        if (i + 1) % iter_save == 0:
            print('Saving the model.............')

            torch.save(model.state_dict(), result_path + '/model_' + str(i + 1))
            with open(result_path + '/loss_' + str(i + 1), 'w') as f:
                f.write('\n'.join([
                                'Prediction loss: ' + str(avg_pred_loss),
                                'Consistency loss: ' + str(avg_consis_loss),
                                'Curvature loss: ' + str(avg_cur_loss),
                                'Training loss: ' + str(avg_loss)
                                ]))
    if env_name == 'planar' and save_map:
        latent_maps[0].save(result_path + '/latent_map.gif', format='GIF', append_images=latent_maps[1:], save_all=True, duration=100, loop=0)
    writer.close()
コード例 #5
0
def main(args):
    env_name = args.env
    assert env_name in ['planar', 'pendulum', 'cartpole']
    possible_tasks = env_task[env_name]
    # each trained model will perform 10 random tasks
    random_task_id = np.random.choice(len(possible_tasks), size=10)
    x_dim, z_dim, u_dim = env_data_dim[env_name]
    if env_name in ['planar', 'pendulum']:
        x_dim = np.prod(x_dim)

    # the folder where all trained models are saved
    folder = 'result/' + env_name
    log_folders = [
        os.path.join(folder, dI) for dI in os.listdir(folder)
        if os.path.isdir(os.path.join(folder, dI))
    ]
    log_folders.sort()

    # statistics on all trained models
    avg_model_percent = 0.0
    best_model_percent = 0.0
    for log in log_folders:
        with open(log + '/settings', 'r') as f:
            settings = json.load(f)
            armotized = settings['armotized']

        log_base = os.path.basename(os.path.normpath(log))
        model_path = 'iLQR_result/' + env_name + '/' + log_base
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        print('iLQR for ' + log_base)

        # load the trained model
        model = PCC(armotized, x_dim, z_dim, u_dim, env_name)
        model.load_state_dict(
            torch.load(log + '/model_5000', map_location='cpu'))
        model.eval()
        dynamics = model.dynamics
        encoder = model.encoder

        # run the task with 10 different start and goal states for a particular model
        avg_percent = 0.0
        for task_counter in range(10):
            # pick a random task
            random_task = possible_tasks[random_task_id[task_counter]]
            with open(config_path[random_task]) as f:
                config = json.load(f)
            print('Performing task: ' + str(random_task))

            # environment specification
            horizon = config['horizon_prob']
            plan_len = config['plan_len']

            # ilqr specification
            R_z = config['q_weight'] * np.eye(z_dim)
            R_u = config['r_weight'] * np.eye(u_dim)
            num_uniform = config['uniform_trajs']
            num_extreme = config['extreme_trajs']
            ilqr_iters = config['ilqr_iters']
            inv_regulator_init = config['pinv_init']
            inv_regulator_multi = config['pinv_mult']
            inv_regulator_max = config['pinv_max']
            alpha_init = config['alpha_init']
            alpha_mult = config['alpha_mult']
            alpha_min = config['alpha_min']

            # sample random start and goal state
            s_start_min, s_start_max = config['start_min'], config['start_max']
            s_start = np.random.uniform(low=s_start_min, high=s_start_max)
            s_goal = config['goal'][np.random.choice(len(config['goal']))]
            s_goal = np.array(s_goal)

            # mdp
            if env_name == 'planar':
                mdp = PlanarObstaclesMDP(goal=s_goal,
                                         goal_thres=config['distance_thresh'],
                                         noise=config['noise'])
            elif env_name == 'pendulum':
                mdp = PendulumMDP(frequency=config['frequency'],
                                  noise=config['noise'],
                                  torque=config['torque'])
            elif env_name == 'cartpole':
                mdp = CartPoleMDP(frequency=config['frequency'],
                                  noise=config['noise'])
            # get z_start and z_goal
            x_start = get_x_data(mdp, s_start, config)
            x_goal = get_x_data(mdp, s_goal, config)
            with torch.no_grad():
                z_start = encoder(x_start).mean
                z_goal = encoder(x_goal).mean
            z_start = z_start.squeeze().numpy()
            z_goal = z_goal.squeeze().numpy()

            # initialize actions trajectories
            all_actions_trajs = random_actions_trajs(mdp, num_uniform,
                                                     num_extreme, plan_len)
            actions_final = []

            # perform reciding horizon iLQR
            s_start_horizon = np.copy(
                s_start)  # s_start and z_start is changed at each horizon
            z_start_horizon = np.copy(z_start)
            for plan_iter in range(1, horizon + 1):
                print('Planning for horizon ' + str(plan_iter))
                latent_cost_list = [None] * len(all_actions_trajs)
                # iterate over all trajectories
                for traj_id in range(len(all_actions_trajs)):
                    print('Running iLQR for trajectory ' + str(traj_id + 1))
                    # initialize the inverse regulator
                    inv_regulator = inv_regulator_init
                    for iter in range(1, ilqr_iters + 1):
                        u_seq = all_actions_trajs[traj_id]
                        z_seq = compute_latent_traj(z_start_horizon, u_seq,
                                                    dynamics)
                        # compute the linearization matrices
                        A_seq, B_seq = seq_jacobian(dynamics, z_seq, u_seq)
                        # run backward
                        k_small, K_big = backward(R_z, R_u, z_seq, u_seq,
                                                  z_goal, A_seq, B_seq,
                                                  inv_regulator)
                        current_cost = latent_cost(R_z, R_u, z_seq, z_goal,
                                                   u_seq)
                        # forward using line search
                        alpha = alpha_init
                        accept = False  # if any alpha is accepted
                        while alpha > alpha_min:
                            z_seq_cand, u_seq_cand = forward(
                                z_seq, all_actions_trajs[traj_id], k_small,
                                K_big, dynamics, alpha)
                            cost_cand = latent_cost(R_z, R_u, z_seq_cand,
                                                    z_goal, u_seq_cand)
                            if cost_cand < current_cost:  # accept the trajectory candidate
                                accept = True
                                all_actions_trajs[traj_id] = u_seq_cand
                                latent_cost_list[traj_id] = cost_cand
                                print(
                                    'Found a better action sequence. New latent cost: '
                                    + str(cost_cand.item()))
                                break
                            else:
                                alpha *= alpha_mult
                        if accept:
                            inv_regulator = inv_regulator_init
                        else:
                            inv_regulator *= inv_regulator_multi
                        if inv_regulator > inv_regulator_max:
                            print('Can not improve. Stop iLQR.')
                            break

                    print(
                        '==================================================================='
                    )

                for i in range(len(latent_cost_list)):
                    if latent_cost_list[i] == None:
                        latent_cost_list[i] = np.inf
                traj_opt_id = np.argmin(latent_cost_list)
                action_chosen = all_actions_trajs[traj_opt_id][0]
                # action_chosen = np.clip(action_chosen, mdp.action_range[0], mdp.action_range[1])
                actions_final.append(action_chosen)
                s_start_horizon, z_start_horizon = update_horizon_start(
                    mdp, s_start_horizon, action_chosen, encoder, config)
                all_actions_trajs = refresh_actions_trajs(
                    all_actions_trajs, traj_opt_id, mdp,
                    np.min([plan_len, horizon - plan_iter]), num_uniform,
                    num_extreme)

            obs_traj, goal_counter = traj_opt_actions(s_start, actions_final,
                                                      mdp)
            # compute the percentage close to goal
            percent = goal_counter / horizon
            avg_percent += percent
            with open(model_path + '/result.txt', 'a+') as f:
                f.write(random_task + ': ' + str(percent) + '\n')

            # save trajectory as gif file
            gif_path = model_path + '/task_{:01d}.gif'.format(task_counter + 1)
            save_traj(obs_traj,
                      mdp.render(s_goal).squeeze(), gif_path, random_task)

        avg_percent = avg_percent / 10
        avg_model_percent += avg_percent
        if avg_percent > best_model_percent:
            best_model = log_base
            best_model_percent = avg_percent
        with open(model_path + '/result.txt', 'a+') as f:
            f.write('Average percentage: ' + str(avg_percent))

    avg_model_percent = avg_model_percent / len(log_folders)
    with open('iLQR_result/' + env_name + '/result.txt', 'w') as f:
        f.write('Average percentage of all models: ' + str(avg_model_percent) +
                '\n')
        f.write('Best model: ' + best_model + ', best percentage: ' +
                str(best_model_percent))