Beispiel #1
0
def random_sample():
    # mean trajectory length over 100 trials for a newly initialized policy
    #                random_sample_policy
    # pos1                           6.28
    # pos2                           6.47
    # pos3                           6.30
    # pos_test                       6.94
    # resetHandEasy                  5.78

    parameters = OriginalParams(Parmap(time_step=1e-2, nb_iter=500))
    env = AllegroHand(parameters)
    mdim_vals = {}
    name_pathl = {}
    # try:
    tf.reset_default_graph()
    sess = tf.Session()
    num_tra = 100
    with tf.compat.v1.variable_scope("random_sample_policy"):
        policy = MLPGaussianPolicy(sess, env.action_space.shape[0], env.observation_space.shape[0],
                                   mean_mult=parameters.mean_mult,
                                   init_sigma=parameters.init_sigma,
                                   min_sigma=parameters.min_sigma)
    sess.run(tf.compat.v1.global_variables_initializer())
    all_positions = position_list.copy()
    all_positions.append(pos_test)
    for position_func in all_positions:
        nb_traj_length = run_lower_with_environment(position_func, env, lambda obs: policy.get_action(obs), num_trajectories=num_tra)
        # nb_traj_length = run_lower_policy(position_func, params, policy, num_trajectories=num_tra, show=True)
        name_pathl.update({position_func.__name__: nb_traj_length})
    env.close()
    U.get_session().close()
    # except Exception as Id:
    #     print(name, 'not found in meta')
    mdim_vals.update({"random_sample_policy": name_pathl})
    return pd.DataFrame(mdim_vals)
def loadParamMap(folder, uuid_s=36):
    run_folder = os.path.join(TB_DIR, folder)
    params = Parmap.from_config_file(run_folder + "/config")
    num_traj_score = read_num_trajectories_score(run_folder)
    traj_progress = read_num_trajectories_progress(run_folder)
    params.score = num_traj_score
    params.score_p = traj_progress
    params.log_name = folder
    params.pol_saving_name = folder[-uuid_s:]
    params.ParamObject = params
    return params
Beispiel #3
0
def find_upper_policy_run_suffixes():
    folder_names = os.listdir(TB_DIR)
    par_list = []
    for folder in folder_names:
        try:
            params = Parmap.from_config_file(TB_DIR + folder + "/config")
            if len(params.get_fingers()) == 4:
                par_list.append(params)
        except KeyError as keyr:
            print(folder)
            print(keyr)
    print('implement me')
Beispiel #4
0
def save_position_as_images(res=1600):
    env = AllegroHand(Parmap(), gui=True, show_fingers=True)
    for pos in all_pos_list:
        # env.pc.reset_debug_visualizer_camera(CamProps.camera_distance,
        #                                      CamProps.camera_yaw,
        #                                      CamProps.camera_pitch,
        #                                      CamProps.camera_target_position)
        env = pos(env)
        env = adhere_target_to_initial(env)
        env.pc.time_step = 1e-3
        env.pc.step_simulation()
        camera_image = env.pc.call(pybullet.getCameraImage, res, res)
        rbg_image = camera_image[2]
        img = Image.fromarray(rbg_image[:, :, 0:3])  # discard alpha channel
        img.save(''+pos.__name__+'.pdf')
        print("break")
        time.sleep(1)
    env.close()
def multi_wrapper(args):
    pars = OriginalParams(Parmap(time_step=1e-2, nb_iter=500, discount=.99))
    rwd_function = args[1]
    pars.rwd_func = rwd_function

    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = rwd_function.__name__ + str(t)
    pars.run_name = run_name_suffix + pars.run_name

    env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False)
    tf.reset_default_graph()
    sess = tf.Session()
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    print("reset")
    time.sleep(0.1)
    U.get_session().close()
    env.close()
    time.sleep(0.1)
    return args[0]
Beispiel #6
0
def multi_wrapper(args):
    pars = OriginalParams(
        Parmap(time_step=1e-2, nb_iter=600, movement_noise=args))
    pars.rwd_func = weighted_or_dist

    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = "SimulatedNoise_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name

    env = AllegronNoiseFromUpper(pars, gui=False, show_fingers=False)
    tf.reset_default_graph()
    sess = tf.Session()
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    print("reset")
    time.sleep(0.1)
    U.get_session().close()
    env.close()
    time.sleep(0.1)
    return args
def get_non_trained_env():
    pars = UpperParmap(Parmap())
    env = AllegroHand(pars)
    return env, pars
Beispiel #8
0
from setting_utils.param_handler import Parmap, TB_DIR, OriginalParams
import time
import baselines.common.tf_util as U

from setting_utils.rewards import move_connection_to_object
import numpy as np

os.environ['OPENAI_LOG_FORMAT'] = 'stdout,log,csv,tensorboard'
os.environ['OPENAI_LOGDIR'] = TB_DIR
import tensorflow as tf
from ausy_base.learn_policy import learn_on_env as learn_ppo

if __name__ == "__main__":
    t = time.asctime().replace(" ", "_").replace(":", "_")
    print('Start run at %s' % t)
    pars = OriginalParams(Parmap(nb_iter=300))
    pars.rwd_func = move_connection_to_object

    env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False)
    # logging
    run_name_suffix = "TestingNewSaveDir_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name
    print(dict(**vars(pars)))
    # init session
    tf.reset_default_graph()
    sess = tf.Session()
    # learn
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    # clean session close
    time.sleep(0.1)
    U.get_session().close()
def build_action_space(param_map: Parmap):
    """low, high"""
    return np.ones(
        len(param_map.get_fingers()) * len(OTHER_LOW)) * -1, np.ones(
            len(param_map.get_fingers()) * len(OTHER_HIGH))
def learn_on_env(*,
                 session,
                 env: gym.Env,
                 parameters: Parmap,
                 seed=0,
                 dir_out='exp/',
                 tf_saver=None):
    assert parameters is not None, "You have to pass the runs parameter as paramObject!"
    logger.configure(TB_DIR + parameters.run_name)
    parameters.log_run()
    # seeding, comment out if you do not want deterministic behavior across runs
    # np.random.seed(seed)
    # tf.set_random_seed(seed)
    # mlp for policy

    with tf.compat.v1.variable_scope(parameters.run_name):
        policy = MLPGaussianPolicy(session,
                                   env.action_space.shape[0],
                                   env.observation_space.shape[0],
                                   mean_mult=parameters.mean_mult,
                                   init_sigma=parameters.init_sigma,
                                   min_sigma=parameters.min_sigma)
    p_update = PolicyUpdateObjective(session,
                                     policy,
                                     e_clip=parameters.e_clip,
                                     a_lrate=parameters.a_lrate)

    # mlp for v_function

    with tf.compat.v1.variable_scope("v_func"):
        vmlp = policies.getMLPdef(1, env.observation_space.shape[0])
    p_eval = PolicyEvalV(session, policy, vmlp, v_lrate=parameters.v_lrate)

    # p3o
    delta_logsig = np.float32(
        (np.log(parameters.init_sigma) - np.log(parameters.min_sigma)) /
        parameters.nb_iter)
    reduce_exploration_op = tf.compat.v1.assign(
        policy.min_sigma,
        tf.maximum(parameters.min_sigma,
                   policy.min_sigma.value() / tf.exp(delta_logsig)))
    reduce_exploration = lambda: session.run(reduce_exploration_op)
    p3o = P3O(session, policy, p_eval, p_update)
    session.run(tf.compat.v1.global_variables_initializer())
    if not tf_saver:
        tf_saver = tf.compat.v1.train.Saver(
            var_list=tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.GLOBAL_VARIABLES,
                scope=parameters.run_name + "/pi"),
            max_to_keep=1)
    for it in range(parameters.nb_iter):
        logger.record_tabular("iteration", it + 1)

        # Generates transition data by interacting with the env
        new_paths = dat.rollouts(env,
                                 policy=policy.get_action,
                                 min_trans=parameters.min_trans_per_iter,
                                 render=False)
        new_paths["logprob"] = policy.get_log_proba(new_paths["obs"],
                                                    new_paths["act"])
        logger.record_tabular("num_trajectories", new_paths["nb_paths"])
        rwd_vals_on_paths_end = new_paths["rwd"][new_paths["done"]]
        logger.record_tabular(
            "reached_rel",
            np.sum(rwd_vals_on_paths_end != parameters.extrinsic_reward) /
            new_paths['nb_paths'])  #see test2 reward function
        logger.record_tabular("avg / run",
                              np.sum(new_paths["rwd"]) / new_paths["nb_paths"])
        logger.record_tabular("avg rwd", np.mean(new_paths["rwd"]))
        logger.record_tabular("avg rwd non end",
                              np.mean(new_paths["rwd"][~new_paths["done"]]))
        # logger.record_tabular("mean_tactile_per_state",
        #                       new_paths["obs"][:, -TACTILE_OBS.size:].mean(axis=1).sum() / new_paths["obs"].shape[0])
        # Keep the last max_v_data_size for off-policy policy eval
        if it == 0:
            v_paths = new_paths
        else:
            dat.merge_data(v_paths, new_paths)
            dat.delete_data(v_paths, parameters.max_v_data_size)
        # update policy
        gen_adv = p3o.iteration(v_paths,
                                new_paths,
                                parameters.discount,
                                parameters.lam_trace,
                                parameters.epochs_per_iter,
                                parameters.batch_size,
                                reduce_exploration,
                                verbose=True)
        # log update data
        if dir_out is not None:
            entrop_after = policy.get_entropy(None, None)
            log_act_probas_new = policy.get_log_proba(new_paths["obs"],
                                                      new_paths["act"])
            diff = np.exp(log_act_probas_new - new_paths["logprob"]) - 1

            #logger.record_tabular("log_act_probas_new", log_act_probas_new)
            logger.record_tabular("diff_mean", diff.mean())
            logger.record_tabular("gen_adv_sum",
                                  gen_adv.sum())  # not gen_advantage?

            logger.dump_tabular()
            if it % 30 == 0:
                print("saving " + parameters.run_name)
                # regularly save policy
                tf_saver.save(session,
                              os.path.join(TB_DIR, parameters.run_name) + '/',
                              meta_graph_suffix="meta",
                              write_meta_graph=True,
                              global_step=it)
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv

from setting_utils.param_handler import Parmap

from ausy_base.learn_policy import learn_on_env as learn_ppo
import time
import tensorflow as tf

if __name__ == '__main__':
    pars = Parmap()
    #env = Walker2DBulletEnv(render=True)
    env = HumanoidBulletEnv(render=False)
    # for logging
    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = "PPO_test_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name

    tf.reset_default_graph()
    sess = tf.Session()
    pars.nb_iter = 300000
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
Beispiel #12
0
        #                                      CamProps.camera_pitch,
        #                                      CamProps.camera_target_position)
        env = pos(env)
        env = adhere_target_to_initial(env)
        env.pc.time_step = 1e-3
        env.pc.step_simulation()
        camera_image = env.pc.call(pybullet.getCameraImage, res, res)
        rbg_image = camera_image[2]
        img = Image.fromarray(rbg_image[:, :, 0:3])  # discard alpha channel
        img.save(''+pos.__name__+'.pdf')
        print("break")
        time.sleep(1)
    env.close()

if __name__ == '__main__':
    pars = Parmap(time_step=1e-2)
    pars.rwd_func = lambda x: (0, False)
    env = AllegroCollisionFromTactileHand(pars, gui=True, show_fingers=True)

    all_pos = all_pos_list.copy()
    for pos in all_pos:
        env = pos(env)
        zero_act = get_joint_state(env.hand)
        env.pc.step_simulation()
        for i in range(3):
            # put debugger here, and use console to prototype new hand-object configurations
            env.show_collisions()
            tac_state_dict = tactile_state_dict(env.hand)
            print(env.reward())
            for finger in tac_state_dict:
                print(finger, np.sum(tac_state_dict[finger]))
Beispiel #13
0
 def __init__(self, allegros_params: Parmap, gui=False, show_fingers=False):
     self.objects_with_collision = allegros_params.get_fingers()
     super(AllegroCollisionFromTactileHand,
           self).__init__(allegros_params, gui, show_fingers)