policy = data['evaluation/policy'] # import ipdb; ipdb.set_trace() # policy = policy.to("cpu") # image_env = ImageEnv( # env, # 48, # init_camera=sawyer_init_camera_zoomed_in, # transpose=True, # normalize=True, # ) env_name = pendulum outfile = "/home/ashvin/data/s3doodad/demos/icml2020/mujoco/%s.npy" % env_name horizon = ENV_PARAMS[env_name]['max_path_length'] collect_demos_fixed( env, policy, outfile, N=100, horizon=horizon ) # , threshold=.1, add_action_noise=False, key='puck_distance', render=True, noise_sigma=0.0) # data = load_local_or_remote_file("demos/pusher_demos_1000.npy") # for i in range(100): # goal = data[i]['observations'][49]['desired_goal'] # o = env.reset() # path_length = 0 # while path_length < 50: # env.set_goal({'state_desired_goal':goal}) # o = o['state_observation'] # new_obs = np.hstack((o, goal)) # a, agent_info = policy.get_action(new_obs) # o, r, d, env_info = env.step(a) # path_length += 1 # print(i, env_info['puck_distance'])
import gym import numpy as np if __name__ == '__main__': expert = SpaceMouseExpert( xyz_dims=1, xyz_remap=[1, 0, 2], xyz_scale=[1, -1, -1], ) env = gym.make("MountainCarContinuous-v0") env.render() # env = SawyerMultiobjectEnv( # num_objects=1, # preload_obj_dict=[ # dict(color2=(0.1, 0.1, 0.9)), # ], # ) # env = ImageEnv(env, # recompute_reward=False, # # transpose=True, # init_camera=sawyer_pusher_camera_upright_v2, # ) collect_demos_fixed(env, expert, "test.npy", 100, horizon=10000, render=True)
import gym import numpy as np from sawyer_control.envs.sawyer_insertion_refined import SawyerHumanControlEnv if __name__ == '__main__': expert = KeyboardExpert( xyz_dims=3, # xyz_remap=[0, 1, 2], xyz_scale=[-1, -1, 0.75], ) env = SawyerHumanControlEnv(action_mode='joint_space_impd', position_action_scale=1, max_speed=0.015) # env = gym.make("MountainCarContinuous-v0") # env = SawyerMultiobjectEnv( # num_objects=1, # preload_obj_dict=[ # dict(color2=(0.1, 0.1, 0.9)), # ], # ) # env = ImageEnv(env, # recompute_reward=False, # # transpose=True, # init_camera=sawyer_pusher_camera_upright_v2, # ) collect_demos_fixed(env, expert, "insertion10.npy", 10)
# data = load_local_or_remote_file( # '/home/murtaza/research/railrl/data/doodads3/03-08-bc-ant-gym-v1/03-08-bc_ant_gym_v1_2020_03_08_19_22_00_id000--s39483/bc.pkl') # # env = gym.make('Ant-v2') # policy = MakeDeterministic(data.cpu()) # collect_demos_fixed(env, policy, "data/local/demos/ant_off_policy_10_demos_100.npy", N=100, horizon=1000, threshold=-1, # render=False) data = load_local_or_remote_file( '/home/murtaza/research/railrl/data/local/03-09-bc-ant-frac-trajs-sweep/03-09-bc_ant_frac_trajs_sweep_2020_03_09_17_58_01_id000--s71624/bc.pkl' ) env = gym.make('Ant-v2') policy = data.cpu() collect_demos_fixed(env, policy, "data/local/demos/ant_off_policy_10_demos_100.npy", N=100, horizon=1000, threshold=-1, render=False) data = load_local_or_remote_file( '/home/murtaza/research/railrl/data/local/03-09-bc-ant-frac-trajs-sweep/03-09-bc_ant_frac_trajs_sweep_2020_03_09_17_58_02_id000--s47768/bc.pkl' ) env = gym.make('Ant-v2') policy = data.cpu() collect_demos_fixed(env, policy, "data/local/demos/ant_off_policy_15_demos_100.npy", N=100, horizon=1000, threshold=-1,
# from multiworld.core.image_env import ImageEnv # from multiworld.envs.mujoco.cameras import sawyer_pusher_camera_upright_v2 # from multiworld.envs.mujoco.sawyer_xyz.sawyer_push_multiobj import SawyerMultiobjectEnv # from multiworld.envs.pygame.point2d import Point2DWallEnv import gym import numpy as np if __name__ == '__main__': expert = SpaceMouseExpert( xyz_dims=1, xyz_remap=[1, 0, 2], xyz_scale=[-1, -1, -1], ) env = gym.make("MountainCarContinuous-v0") # env = SawyerMultiobjectEnv( # num_objects=1, # preload_obj_dict=[ # dict(color2=(0.1, 0.1, 0.9)), # ], # ) # env = ImageEnv(env, # recompute_reward=False, # # transpose=True, # init_camera=sawyer_pusher_camera_upright_v2, # ) collect_demos_fixed(env, expert, "test.npy", 10)
env = ImageEnv( env, recompute_reward=False, transpose=True, image_length=450000, reward_type="image_distance", # init_camera=sawyer_pusher_camera_upright_v2, ) # env.reset() for i in range(25): collect_demos_fixed( env, expert, "/home/anair/ros_ws/src/railrl-private/demos/demo_v4_grey_%i.pkl" % i, 1, horizon=1000, pause=0.05) print("Collected demo: ", i) # for i in range(10): # collect_demos_fixed(env, expert, "demos/demo_v3.pkl", 1, horizon=1000, pause=0.05) # o = None # while True: # a, valid, reset, accept = expert.get_action(o) # if valid: # o, r, done, info = env.step(a) # time.sleep(0.05)
# data = load_local_or_remote_file('01-12-sac-mujoco-envs/01-12-sac_mujoco_envs_2020_01_12_22_34_03_id000--s52204/params.pkl') # data = load_local_or_remote_file('02-20-sac-mujoco-envs-unnormalized-run-longer/02-20-sac_mujoco_envs_unnormalized_run_longer_2020_02_20_23_55_13_id000--s39214/params.pkl') # # env = data['exploration/env'] # env = gym.make('HalfCheetah-v2') # policy = data['exploration/policy'] # collect_demos_fixed(env, policy, "data/local/demos/hc_action_noise_1000.npy", N=1000, horizon=1000, threshold=9000, render=False) # data = load_local_or_remote_file( '02-17-sac-mujoco-envs-unnormalized/02-17-sac_mujoco_envs_unnormalized_2020_02_18_01_07_08_id005--s24204/params.pkl' ) env = data['evaluation/env'] policy = data['exploration/policy'] collect_demos_fixed(env, policy, "data/local/demos/hopper_action_noise_1000.npy", N=1000, horizon=1000, threshold=3000, render=False) data = load_local_or_remote_file( '02-17-sac-mujoco-envs-unnormalized/02-17-sac_mujoco_envs_unnormalized_2020_02_18_01_07_32_id003--s29410/params.pkl' ) env = data['evaluation/env'] policy = data['exploration/policy'] collect_demos_fixed(env, policy, "data/local/demos/ant_action_noise_1000.npy", N=1000, horizon=1000, threshold=5000,
from railrl.demos.collect_demo import collect_demos_fixed from railrl.misc.asset_loader import load_local_or_remote_file import gym if __name__ == '__main__': data = load_local_or_remote_file( '02-20-sac-mujoco-envs-unnormalized-run-longer/02-20-sac_mujoco_envs_unnormalized_run_longer_2020_02_20_23_55_13_id000--s39214/params.pkl' ) env = data['exploration/env'] policy = data['exploration/policy'] collect_demos_fixed(env, policy, "data/local/demos/hc_action_noise_25.npy", N=25, horizon=1000, threshold=9000, render=False) # data = load_local_or_remote_file( # '/home/murtaza/research/railrl/data/local/03-04-bc-hc-v2/03-04-bc_hc_v2_2020_03_04_17_57_54_id000--s90897/bc.pkl') # env = gym.make('HalfCheetah-v2') # policy = data.cpu() # collect_demos_fixed(env, policy, "data/local/demos/hc_off_policy_100.npy", N=100, horizon=1000, threshold=8000, # render=False) # data = load_local_or_remote_file( # '/home/murtaza/research/railrl/data/doodads3/03-05-bc-hc-gym-v5/03-05-bc_hc_gym_v5_2020_03_06_06_55_43_id000--s42378/bc.pkl') # env = gym.make('HalfCheetah-v2') # policy = data.cpu() # collect_demos_fixed(env, policy, "data/local/demos/hc_off_policy_10_demos_100.npy", N=100, horizon=1000, threshold=-1, # render=False) # data = load_local_or_remote_file(