def get_data(N = 10000, test_p = 0.9, use_cached=True, render=False): filename = "/tmp/pusher2d_smallpuck_" + str(N) + ".npy" if use_cached and osp.isfile(filename): dataset = np.load(filename).astype(np.float32) print("loaded data from saved file", filename) else: # if not cached now = time.time() e = FullPusher2DEnv() e = ImageMujocoEnv(e, 84, camera_name="topview", transpose=True, normalize=True) dataset = np.zeros((N, 3*84*84)) for i in range(N): if i % 100 == 0: e.reset() u = np.random.rand(3) * 4 - 2 img, _, _, _ = e.step(u) dataset[i, :] = img if render: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now, "mean", dataset.mean()), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset
def get_data(N=10000, test_p=0.9, use_cached=True, imsize=84): filename = "/tmp/sawyer_" + str(N) + ".npy" info = {} if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEnv() env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=sawyer_init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.wrapped_env.set_goal(env.wrapped_env.sample_goal_for_rollout()) env.reset() for _ in range(50): env.wrapped_env.step(env.wrapped_env.action_space.sample()) img = env.step(env.action_space.sample())[0] dataset[i, :] = img # cv2.imshow('img', img.reshape(3, 84, 84).transpose()) # cv2.waitKey(1) print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, ): if env_kwargs is None: env_kwargs = {} filename = "/tmp/sawyer_push_variable{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYVariableEnv(hide_goal=True, **env_kwargs) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): goal = env.sample_goal_for_rollout() hand_pos = env.sample_hand_xy() env.set_to_goal(goal, reset_hand=False) env.set_hand_xy(hand_pos) # img = env.reset() img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, ): """ Oracle means that we use `set_to_goal` rather than doing random rollouts. """ if env_kwargs is None: env_kwargs = {} filename = "/tmp/sawyer_reset_free_push{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerResetFreePushEnv(hide_goal=True, **env_kwargs) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): goal = env.sample_goal_for_rollout() env.set_to_goal(goal) img = env.reset() dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, ): filename = "/tmp/sawyer_push_new_easy{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEasyEnv(hide_goal=True) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.reset() for _ in range(100): action = env.wrapped_env.action_space.sample() # action[0] = 0 # action[1] = 1 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img print(i) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, ): filename = "/tmp/sawyer_push_new_easy_wider2_" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEasyEnv(hide_goal=True) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=sawyer_init_camera_zoomed_in, # init_camera=sawyer_init_camera, normalize=True, ) info['env'] = env policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): # env.reset() if i % 100 == 0: g = env.sample_goal_for_rollout() env.set_goal(g) policy.reset() u = policy.get_action_from_raw_action(env.action_space.sample()) img = env.step(u)[0] dataset[i, :] = img if show: # env.render() cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def experiment(variant): imsize = variant['imsize'] history = variant['history'] env = gym.make(variant['env_id']).env training_env = gym.make(variant['env_id']).env env = NormalizedBoxEnv(env) training_env = NormalizedBoxEnv(training_env) env = ImageMujocoEnv(env, imsize=imsize, keep_prev=history - 1, init_camera=variant['init_camera']) training_env = ImageMujocoEnv(training_env, imsize=imsize, keep_prev=history - 1, init_camera=variant['init_camera']) env = DiscretizeEnv(env, variant['bins']) training_env = DiscretizeEnv(training_env, variant['bins']) qf = CNN(output_size=env.action_space.n, input_width=imsize, input_height=imsize, input_channels=history, **variant['cnn_params']) qf_criterion = variant['qf_criterion_class']() algorithm = variant['algo_class'](env, training_env=training_env, qf=qf, qf_criterion=qf_criterion, **variant['algo_params']) algorithm.train()
def experiment(variant): imsize = variant['imsize'] history = variant['history'] env = Pusher2DEnv()#gym.make(variant['env_id']).env env = NormalizedBoxEnv(ImageMujocoEnv(env, imsize=imsize, keep_prev=history - 1, init_camera=variant['init_camera'])) # es = GaussianStrategy( # action_space=env.action_space, # ) es = OUStrategy(action_space=env.action_space) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf = MergedCNN(input_width=imsize, input_height=imsize, output_size=1, input_channels= history, added_fc_input_size=action_dim, **variant['cnn_params']) vf = CNN(input_width=imsize, input_height=imsize, output_size=1, input_channels=history, **variant['cnn_params']) policy = TanhCNNGaussianPolicy(input_width=imsize, input_height=imsize, output_size=action_dim, input_channels=history, **variant['cnn_params'], ) algorithm = SoftActorCritic( env=env, policy=policy, qf=qf, vf=vf, **variant['algo_params'] ) algorithm.train()
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, ): filename = "/tmp/sawyer_" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEnv() env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=sawyer_init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): # Move the goal out of the image env.wrapped_env.set_goal(np.array([100, 100])) env.reset() for _ in range(50): env.wrapped_env.step( env.wrapped_env.action_space.sample() ) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now), dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def experiment(variant): rdim = variant["rdim"] vae_paths = { 2: "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id0/params.pkl", 4: "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id1/params.pkl", 8: "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id2/params.pkl", 16: "/home/ashvin/data/s3doodad/ashvin/vae/pusher2d-conv-sweep2/run1/id3/params.pkl" } vae_path = vae_paths[rdim] vae = torch.load(vae_path) print("loaded", vae_path) if variant['multitask']: env = CylinderXYPusher2DEnv(**variant["env_kwargs"]) env = ImageMujocoEnv(env, 84, camera_name="topview", transpose=True) env = VAEWrappedEnv(env, vae, use_vae_obs=True, use_vae_reward=True, use_vae_goals=True) env = MultitaskToFlatEnv(env) # else: # env = Pusher2DEnv(**variant['env_kwargs']) if variant['normalize']: env = NormalizedBoxEnv(env) exploration_type = variant['exploration_type'] if exploration_type == 'ou': es = OUStrategy(action_space=env.action_space) elif exploration_type == 'gaussian': es = GaussianStrategy( action_space=env.action_space, max_sigma=0.1, min_sigma=0.1, # Constant sigma ) elif exploration_type == 'epsilon': es = EpsilonGreedy( action_space=env.action_space, prob_random_action=0.1, ) else: raise Exception("Invalid type: " + exploration_type) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, hidden_sizes=[400, 300], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) algorithm = TD3(env, training_env=env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, **variant['algo_kwargs']) print("use_gpu", variant["use_gpu"], bool(variant["use_gpu"])) if variant["use_gpu"]: gpu_id = variant["gpu_id"] ptu.set_gpu_mode(True) ptu.set_device(gpu_id) algorithm.train()
from railrl.envs.mujoco.sawyer_gripper_env import SawyerXYZEnv from railrl.envs.wrappers import ImageMujocoEnv import cv2 import numpy as np print("making env") sawyer = SawyerXYZEnv() env = ImageMujocoEnv(sawyer, imsize=400) print("starting rollout") while True: obs = env.reset() for t in range(1000): action = env.action_space.sample() obs, reward, done, info = env.step(action) raw_img = env._image_observation() img = np.concatenate(( raw_img[::-1, :, 2:3], raw_img[::-1, :, 1:2], raw_img[::-1, :, 0:1], ), axis=2) cv2.imshow('obs', img) cv2.waitKey(1) # if done: # break print("new episode")
def experiment(variant): rdim = variant["rdim"] use_env_goals = variant["use_env_goals"] vae_path = variant["vae_paths"][str(rdim)] render = variant["render"] wrap_mujoco_env = variant.get("wrap_mujoco_env", False) # vae = torch.load(vae_path) # print("loaded", vae_path) from railrl.envs.wrappers import ImageMujocoEnv, NormalizedBoxEnv from import sawyer_init_camera env = variant["env"](**variant['env_kwargs']) env = NormalizedBoxEnv(ImageMujocoEnv( env, imsize=84, keep_prev=0, init_camera=sawyer_init_camera, )) if wrap_mujoco_env: env = ImageMujocoEnv(env, 84, camera_name="topview", transpose=True, normalize=True) if use_env_goals: track_qpos_goal = variant.get("track_qpos_goal", 0) env = VAEWrappedImageGoalEnv(env, vae_path, use_vae_obs=True, use_vae_reward=True, use_vae_goals=True, render_goals=render, render_rollouts=render, track_qpos_goal=track_qpos_goal) else: env = VAEWrappedEnv(env, vae_path, use_vae_obs=True, use_vae_reward=True, use_vae_goals=True, render_goals=render, render_rollouts=render) env = MultitaskToFlatEnv(env) if variant['normalize']: env = NormalizedBoxEnv(env) exploration_type = variant['exploration_type'] if exploration_type == 'ou': es = OUStrategy(action_space=env.action_space) elif exploration_type == 'gaussian': es = GaussianStrategy( action_space=env.action_space, max_sigma=0.1, min_sigma=0.1, # Constant sigma ) elif exploration_type == 'epsilon': es = EpsilonGreedy( action_space=env.action_space, prob_random_action=0.1, ) else: raise Exception("Invalid type: " + exploration_type) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, hidden_sizes=[400, 300], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) algorithm = TD3( env, training_env=env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, **variant['algo_kwargs'] )
def simulate_policy(args): data = joblib.load(args.file) if 'eval_policy' in data: policy = data['eval_policy'] elif 'policy' in data: policy = data['policy'] elif 'exploration_policy' in data: policy = data['exploration_policy'] elif 'naf_policy' in data: policy = data['naf_policy'] elif 'optimizable_qfunction' in data: qf = data['optimizable_qfunction'] policy = qf.implicit_policy else: raise Exception("No policy found in loaded dict. Keys: {}".format( data.keys())) env = data['env'] if isinstance(env, RemoteRolloutEnv): env = env._wrapped_env print("Policy loaded") env.mode("video_env") env.decode_goals = True image_env = ImageMujocoEnv( env._wrapped_env._wrapped_env, 84, init_camera=None, camera_name="topview", transpose=True, normalize=True, ) # env.image_env = image_env if args.enable_render: # some environments need to be reconfigured for visualization env.enable_render() if args.gpu: set_gpu_mode(True) if hasattr(env, "vae"): else: # make sure everything is on the CPU set_gpu_mode(False) policy.cpu() if hasattr(env, "vae"): env.vae.cpu() if args.pause: import ipdb ipdb.set_trace() if isinstance(policy, PyTorchModule): policy.train(False) ROWS = 3 COLUMNS = 6 dirname = osp.dirname(args.file) input_file_name = os.path.splitext(os.path.basename(args.file))[0] filename = osp.join(dirname, "video_{}.mp4".format(input_file_name)) paths = dump_video( env, policy, filename, ROWS=ROWS, COLUMNS=COLUMNS, horizon=args.H, image_env=image_env, dirname=dirname, subdirname="rollouts_" + input_file_name, ) if hasattr(env, "log_diagnostics"): env.log_diagnostics(paths) logger.dump_tabular()