Exemple #1
0
def random_sample():
    # mean trajectory length over 100 trials for a newly initialized policy
    #                random_sample_policy
    # pos1                           6.28
    # pos2                           6.47
    # pos3                           6.30
    # pos_test                       6.94
    # resetHandEasy                  5.78

    parameters = OriginalParams(Parmap(time_step=1e-2, nb_iter=500))
    env = AllegroHand(parameters)
    mdim_vals = {}
    name_pathl = {}
    # try:
    tf.reset_default_graph()
    sess = tf.Session()
    num_tra = 100
    with tf.compat.v1.variable_scope("random_sample_policy"):
        policy = MLPGaussianPolicy(sess, env.action_space.shape[0], env.observation_space.shape[0],
                                   mean_mult=parameters.mean_mult,
                                   init_sigma=parameters.init_sigma,
                                   min_sigma=parameters.min_sigma)
    sess.run(tf.compat.v1.global_variables_initializer())
    all_positions = position_list.copy()
    all_positions.append(pos_test)
    for position_func in all_positions:
        nb_traj_length = run_lower_with_environment(position_func, env, lambda obs: policy.get_action(obs), num_trajectories=num_tra)
        # nb_traj_length = run_lower_policy(position_func, params, policy, num_trajectories=num_tra, show=True)
        name_pathl.update({position_func.__name__: nb_traj_length})
    env.close()
    U.get_session().close()
    # except Exception as Id:
    #     print(name, 'not found in meta')
    mdim_vals.update({"random_sample_policy": name_pathl})
    return pd.DataFrame(mdim_vals)
def save_position_as_images(res=1600):
    env = AllegroHand(Parmap(), gui=True, show_fingers=True)
    for pos in all_pos_list:
        # env.pc.reset_debug_visualizer_camera(CamProps.camera_distance,
        #                                      CamProps.camera_yaw,
        #                                      CamProps.camera_pitch,
        #                                      CamProps.camera_target_position)
        env = pos(env)
        env = adhere_target_to_initial(env)
        env.pc.time_step = 1e-3
        env.pc.step_simulation()
        camera_image = env.pc.call(pybullet.getCameraImage, res, res)
        rbg_image = camera_image[2]
        img = Image.fromarray(rbg_image[:, :, 0:3])  # discard alpha channel
        img.save(''+pos.__name__+'.pdf')
        print("break")
        time.sleep(1)
    env.close()
def multi_wrapper(args):
    pars = OriginalParams(Parmap(time_step=1e-2, nb_iter=500, discount=.99))
    rwd_function = args[1]
    pars.rwd_func = rwd_function

    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = rwd_function.__name__ + str(t)
    pars.run_name = run_name_suffix + pars.run_name

    env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False)
    tf.reset_default_graph()
    sess = tf.Session()
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    print("reset")
    time.sleep(0.1)
    U.get_session().close()
    env.close()
    time.sleep(0.1)
    return args[0]
def multi_wrapper(args):
    pars = OriginalParams(
        Parmap(time_step=1e-2, nb_iter=600, movement_noise=args))
    pars.rwd_func = weighted_or_dist

    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = "SimulatedNoise_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name

    env = AllegronNoiseFromUpper(pars, gui=False, show_fingers=False)
    tf.reset_default_graph()
    sess = tf.Session()
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    print("reset")
    time.sleep(0.1)
    U.get_session().close()
    env.close()
    time.sleep(0.1)
    return args
def get_non_trained_env():
    pars = UpperParmap(Parmap())
    env = AllegroHand(pars)
    return env, pars
Exemple #6
0
from setting_utils.param_handler import Parmap, TB_DIR, OriginalParams
import time
import baselines.common.tf_util as U

from setting_utils.rewards import move_connection_to_object
import numpy as np

os.environ['OPENAI_LOG_FORMAT'] = 'stdout,log,csv,tensorboard'
os.environ['OPENAI_LOGDIR'] = TB_DIR
import tensorflow as tf
from ausy_base.learn_policy import learn_on_env as learn_ppo

if __name__ == "__main__":
    t = time.asctime().replace(" ", "_").replace(":", "_")
    print('Start run at %s' % t)
    pars = OriginalParams(Parmap(nb_iter=300))
    pars.rwd_func = move_connection_to_object

    env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False)
    # logging
    run_name_suffix = "TestingNewSaveDir_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name
    print(dict(**vars(pars)))
    # init session
    tf.reset_default_graph()
    sess = tf.Session()
    # learn
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    # clean session close
    time.sleep(0.1)
    U.get_session().close()
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv

from setting_utils.param_handler import Parmap

from ausy_base.learn_policy import learn_on_env as learn_ppo
import time
import tensorflow as tf

if __name__ == '__main__':
    pars = Parmap()
    #env = Walker2DBulletEnv(render=True)
    env = HumanoidBulletEnv(render=False)
    # for logging
    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = "PPO_test_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name

    tf.reset_default_graph()
    sess = tf.Session()
    pars.nb_iter = 300000
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
        #                                      CamProps.camera_pitch,
        #                                      CamProps.camera_target_position)
        env = pos(env)
        env = adhere_target_to_initial(env)
        env.pc.time_step = 1e-3
        env.pc.step_simulation()
        camera_image = env.pc.call(pybullet.getCameraImage, res, res)
        rbg_image = camera_image[2]
        img = Image.fromarray(rbg_image[:, :, 0:3])  # discard alpha channel
        img.save(''+pos.__name__+'.pdf')
        print("break")
        time.sleep(1)
    env.close()

if __name__ == '__main__':
    pars = Parmap(time_step=1e-2)
    pars.rwd_func = lambda x: (0, False)
    env = AllegroCollisionFromTactileHand(pars, gui=True, show_fingers=True)

    all_pos = all_pos_list.copy()
    for pos in all_pos:
        env = pos(env)
        zero_act = get_joint_state(env.hand)
        env.pc.step_simulation()
        for i in range(3):
            # put debugger here, and use console to prototype new hand-object configurations
            env.show_collisions()
            tac_state_dict = tactile_state_dict(env.hand)
            print(env.reward())
            for finger in tac_state_dict:
                print(finger, np.sum(tac_state_dict[finger]))