def random_sample(): # mean trajectory length over 100 trials for a newly initialized policy # random_sample_policy # pos1 6.28 # pos2 6.47 # pos3 6.30 # pos_test 6.94 # resetHandEasy 5.78 parameters = OriginalParams(Parmap(time_step=1e-2, nb_iter=500)) env = AllegroHand(parameters) mdim_vals = {} name_pathl = {} # try: tf.reset_default_graph() sess = tf.Session() num_tra = 100 with tf.compat.v1.variable_scope("random_sample_policy"): policy = MLPGaussianPolicy(sess, env.action_space.shape[0], env.observation_space.shape[0], mean_mult=parameters.mean_mult, init_sigma=parameters.init_sigma, min_sigma=parameters.min_sigma) sess.run(tf.compat.v1.global_variables_initializer()) all_positions = position_list.copy() all_positions.append(pos_test) for position_func in all_positions: nb_traj_length = run_lower_with_environment(position_func, env, lambda obs: policy.get_action(obs), num_trajectories=num_tra) # nb_traj_length = run_lower_policy(position_func, params, policy, num_trajectories=num_tra, show=True) name_pathl.update({position_func.__name__: nb_traj_length}) env.close() U.get_session().close() # except Exception as Id: # print(name, 'not found in meta') mdim_vals.update({"random_sample_policy": name_pathl}) return pd.DataFrame(mdim_vals)
def save_position_as_images(res=1600): env = AllegroHand(Parmap(), gui=True, show_fingers=True) for pos in all_pos_list: # env.pc.reset_debug_visualizer_camera(CamProps.camera_distance, # CamProps.camera_yaw, # CamProps.camera_pitch, # CamProps.camera_target_position) env = pos(env) env = adhere_target_to_initial(env) env.pc.time_step = 1e-3 env.pc.step_simulation() camera_image = env.pc.call(pybullet.getCameraImage, res, res) rbg_image = camera_image[2] img = Image.fromarray(rbg_image[:, :, 0:3]) # discard alpha channel img.save(''+pos.__name__+'.pdf') print("break") time.sleep(1) env.close()
def multi_wrapper(args): pars = OriginalParams(Parmap(time_step=1e-2, nb_iter=500, discount=.99)) rwd_function = args[1] pars.rwd_func = rwd_function t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = rwd_function.__name__ + str(t) pars.run_name = run_name_suffix + pars.run_name env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False) tf.reset_default_graph() sess = tf.Session() learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") print("reset") time.sleep(0.1) U.get_session().close() env.close() time.sleep(0.1) return args[0]
def multi_wrapper(args): pars = OriginalParams( Parmap(time_step=1e-2, nb_iter=600, movement_noise=args)) pars.rwd_func = weighted_or_dist t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = "SimulatedNoise_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name env = AllegronNoiseFromUpper(pars, gui=False, show_fingers=False) tf.reset_default_graph() sess = tf.Session() learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") print("reset") time.sleep(0.1) U.get_session().close() env.close() time.sleep(0.1) return args
def get_non_trained_env(): pars = UpperParmap(Parmap()) env = AllegroHand(pars) return env, pars
from setting_utils.param_handler import Parmap, TB_DIR, OriginalParams import time import baselines.common.tf_util as U from setting_utils.rewards import move_connection_to_object import numpy as np os.environ['OPENAI_LOG_FORMAT'] = 'stdout,log,csv,tensorboard' os.environ['OPENAI_LOGDIR'] = TB_DIR import tensorflow as tf from ausy_base.learn_policy import learn_on_env as learn_ppo if __name__ == "__main__": t = time.asctime().replace(" ", "_").replace(":", "_") print('Start run at %s' % t) pars = OriginalParams(Parmap(nb_iter=300)) pars.rwd_func = move_connection_to_object env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False) # logging run_name_suffix = "TestingNewSaveDir_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name print(dict(**vars(pars))) # init session tf.reset_default_graph() sess = tf.Session() # learn learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") # clean session close time.sleep(0.1) U.get_session().close()
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv from setting_utils.param_handler import Parmap from ausy_base.learn_policy import learn_on_env as learn_ppo import time import tensorflow as tf if __name__ == '__main__': pars = Parmap() #env = Walker2DBulletEnv(render=True) env = HumanoidBulletEnv(render=False) # for logging t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = "PPO_test_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name tf.reset_default_graph() sess = tf.Session() pars.nb_iter = 300000 learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
# CamProps.camera_pitch, # CamProps.camera_target_position) env = pos(env) env = adhere_target_to_initial(env) env.pc.time_step = 1e-3 env.pc.step_simulation() camera_image = env.pc.call(pybullet.getCameraImage, res, res) rbg_image = camera_image[2] img = Image.fromarray(rbg_image[:, :, 0:3]) # discard alpha channel img.save(''+pos.__name__+'.pdf') print("break") time.sleep(1) env.close() if __name__ == '__main__': pars = Parmap(time_step=1e-2) pars.rwd_func = lambda x: (0, False) env = AllegroCollisionFromTactileHand(pars, gui=True, show_fingers=True) all_pos = all_pos_list.copy() for pos in all_pos: env = pos(env) zero_act = get_joint_state(env.hand) env.pc.step_simulation() for i in range(3): # put debugger here, and use console to prototype new hand-object configurations env.show_collisions() tac_state_dict = tactile_state_dict(env.hand) print(env.reward()) for finger in tac_state_dict: print(finger, np.sum(tac_state_dict[finger]))