def load_vae_meta_data(variant): from railrl.misc.asset_loader import local_path_from_s3_or_local_path import os.path as osp import json rl_variant = variant['rl_variant'] if 'vae_path' in rl_variant: local_path = local_path_from_s3_or_local_path( osp.join(rl_variant['vae_path'], 'variant.json')) with open(local_path) as f: data = json.load(f) variant['vae_exp_prefix'] = data['exp_prefix'] variant['vae_exp_id'] = data['exp_id'] variant['vae_seed'] = data['seed'] if 'vae_variant' in data: variant['vae_variant'] = data['vae_variant'] else: variant['vae_variant'] = data['train_vae_variant'] if 'reproj_vae_path' in rl_variant: local_path = local_path_from_s3_or_local_path( osp.join(rl_variant['reproj_vae_path'], 'variant.json')) with open(local_path) as f: data = json.load(f) variant['reproj_vae_exp_prefix'] = data['exp_prefix'] variant['reproj_vae_exp_id'] = data['exp_id'] variant['reproj_vae_seed'] = data['seed']
def update_networks_func(algo, epoch): if epoch % algo.epoch_freq != 0 and epoch != algo.num_epochs - 1: exit() if epoch == algo.num_epochs - 1: filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'params.pkl')) else: filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch)) print("updating networks from {}".format(filename)) data = joblib.load(filename) assert (data["epoch"] == epoch) algo.qf1 = data['qf1'] algo.qf2 = data['qf2'] algo.policy = data['trained_policy'] algo.target_policy = data["target_policy"] algo.exploration_policy = data["exploration_policy"] if 'n_env_steps_total' in data: algo._n_env_steps_total = data["n_env_steps_total"] if isinstance(algo.eval_policy, SubgoalPlanner): algo.eval_policy.qf = algo.qf1 algo.eval_policy.mf_policy = algo.policy else: algo.eval_policy = data["eval_policy"] if ptu.gpu_enabled(): algo.cuda() if hasattr(algo, "update_sampler_and_rollout_function"): algo.update_sampler_and_rollout_function()
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None, init_camera=sawyer_door_env_camera_v2, ): if policy_path is not None: filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy" else: filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) env.wrapped_env.reset() dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) for i in range(N): if i % 20==0: env.reset_model() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step( action ) # env.set_to_goal_angle(env.get_goal()['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def process_variant(variant): rl_variant = variant['rl_variant'] if args.debug: rl_variant['algo_kwargs']['base_kwargs']['num_rollouts_per_eval'] = 1 rl_variant['vis_kwargs']['num_samples_for_video'] = 2 rl_variant['vae_wrapped_env_kwargs'][ 'num_samples_for_latent_histogram'] = 100 assert rl_variant['eval_algo'] in [ 'mf-tdm', 'mb-tdm', ] if 'ckpt_and_vae_path' in rl_variant: rl_variant['ckpt'] = rl_variant['ckpt_and_vae_path'][0] rl_variant['vae_path'] = rl_variant['ckpt_and_vae_path'][1] del rl_variant['ckpt_and_vae_path'] update_variant_from_ckpt(variant) update_variant_from_vae(variant) local_path = local_path_from_s3_or_local_path( osp.join(rl_variant['ckpt'], 'variant.json')) with open(local_path) as f: ckpt_variant = json.load(f) ckpt_rl_variant = ckpt_variant.get('rl_variant', ckpt_variant) if 'mb' in rl_variant['eval_algo']: if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \ rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1 if 'extra_time' in rl_variant['SubgoalPlanner_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] -= \ rl_variant['SubgoalPlanner_kwargs']['extra_time'] if 'max_tau_per_subprob' not in rl_variant['SubgoalPlanner_kwargs']: rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'] = \ ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] else: if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \ ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] eval_algo = rl_variant['eval_algo'] if eval_algo == 'mb-tdm': rl_variant['eval_policy'] = 'SubgoalPlanner' rl_variant['do_state_exp'] = False elif eval_algo == 'mf-tdm': pass variant['eval_algo_base'] = rl_variant['eval_algo'] variant['eval_algo_tag'] = 'mt=' + str( rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']) if 'mb' in rl_variant['eval_algo']: variant['eval_algo_tag'] = '-'.join([ variant['eval_algo_tag'], 'mtps=' + str(rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob']) ]) variant['eval_algo'] = '-'.join( [variant['eval_algo_base'], variant['eval_algo_tag']])
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, ): if env_kwargs is None: env_kwargs = {} filename = "/tmp/sawyer_push_variable{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYVariableEnv(hide_goal=True, **env_kwargs) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): goal = env.sample_goal_for_rollout() hand_pos = env.sample_hand_xy() env.set_to_goal(goal, reset_hand=False) env.set_hand_xy(hand_pos) # img = env.reset() img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, ): """ Oracle means that we use `set_to_goal` rather than doing random rollouts. """ if env_kwargs is None: env_kwargs = {} filename = "/tmp/sawyer_reset_free_push{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerResetFreePushEnv(hide_goal=True, **env_kwargs) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): goal = env.sample_goal_for_rollout() env.set_to_goal(goal) img = env.reset() dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def update_variant_from_ckpt(variant): rl_variant = variant['rl_variant'] local_path = local_path_from_s3_or_local_path(osp.join(rl_variant['ckpt'], 'variant.json')) with open(local_path) as f: ckpt_variant = json.load(f) ckpt_rl_variant = ckpt_variant.get('rl_variant', None) if ckpt_rl_variant is None: ckpt_rl_variant = ckpt_variant.get('grill_variant', ckpt_variant) # backwards compatibility env_kwargs = ckpt_variant['env_kwargs'] env_kwargs.update(variant['env_kwargs']) variant['env_kwargs'] = env_kwargs rl_variant['algorithm'] = ckpt_rl_variant['algorithm'] variant['ckpt_exp_prefix'] = ckpt_variant['exp_prefix'] variant['ckpt_exp_id'] = ckpt_variant['exp_id'] variant['ckpt_seed'] = ckpt_variant['seed'] if 'vae_path' in ckpt_rl_variant: rl_variant['vae_path'] = ckpt_rl_variant['vae_path'] if 'vae_variant' in ckpt_variant: variant['vae_variant'] = ckpt_variant['vae_variant'] elif 'train_vae_variant' in ckpt_variant: # backwards compatibility variant['vae_variant'] = ckpt_variant['train_vae_variant'] if 'num_updates_per_env_step' in ckpt_rl_variant['algo_kwargs']['base_kwargs']: rl_variant['algo_kwargs']['base_kwargs']['num_updates_per_env_step'] = \ ckpt_rl_variant['algo_kwargs']['base_kwargs']['num_updates_per_env_step'] if 'max_path_length' not in rl_variant['algo_kwargs']['base_kwargs']: rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] = \ ckpt_rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] if rl_variant.get('test_ckpt', False) and rl_variant['algo_kwargs']['base_kwargs']['num_epochs'] == 1: rl_variant['algo_kwargs']['base_kwargs']['num_epochs'] = \ ckpt_rl_variant['algo_kwargs']['base_kwargs']['num_epochs'] rl_variant['exploration_type'] = ckpt_rl_variant['exploration_type'] rl_variant['exploration_noise'] = ckpt_rl_variant['exploration_noise'] if 'reward_params' in ckpt_rl_variant: rl_variant['reward_params'] = ckpt_rl_variant['reward_params'] if 'vae_wrapped_env_kwargs' in ckpt_rl_variant: for k in ckpt_rl_variant['vae_wrapped_env_kwargs']: if k in ['test_noisy_encoding', 'num_samples_for_latent_histogram'] \ and k in rl_variant['vae_wrapped_env_kwargs']: pass else: rl_variant['vae_wrapped_env_kwargs'][k] = \ ckpt_rl_variant['vae_wrapped_env_kwargs'][k] rl_variant['algo_kwargs']['base_kwargs']['reward_scale'] = \ ckpt_rl_variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0) if 'env_class' not in variant and 'env_id' not in variant and 'env_id' in ckpt_variant: variant['env_id'] = ckpt_variant['env_id'].replace('Train', 'Test')
def compute_sampled_latents(vae_env): vae_env.num_active_dims = 0 for std in vae_env.vae.dist_std: if std > 0.15: vae_env.num_active_dims += 1 vae_env.active_dims = vae_env.vae.dist_std.argsort()[-vae_env.num_active_dims:][::-1] vae_env.inactive_dims = vae_env.vae.dist_std.argsort()[:-vae_env.num_active_dims][::-1] if vae_env.use_vae_dataset and vae_env.vae_dataset_path is not None: from multiworld.core.image_env import normalize_image from railrl.misc.asset_loader import local_path_from_s3_or_local_path filename = local_path_from_s3_or_local_path(vae_env.vae_dataset_path) dataset = np.load(filename).item() vae_env.num_samples_for_latent_histogram = min(dataset['next_obs'].shape[0], vae_env.num_samples_for_latent_histogram) sampled_idx = np.random.choice(dataset['next_obs'].shape[0], vae_env.num_samples_for_latent_histogram) if vae_env.vae_input_key_prefix == 'state': vae_dataset_samples = dataset['next_obs'][sampled_idx] else: vae_dataset_samples = normalize_image(dataset['next_obs'][sampled_idx]) del dataset else: vae_dataset_samples = None n = vae_env.num_samples_for_latent_histogram if vae_dataset_samples is not None: imgs = vae_dataset_samples else: if vae_env.vae_input_key_prefix == 'state': imgs = vae_env.wrapped_env.wrapped_env.sample_goals(n)['state_desired_goal'] else: imgs = vae_env.wrapped_env.sample_goals(n)['image_desired_goal'] batch_size = 2500 latents, latents_noisy, latents_reproj = None, None, None for i in range(0, n, batch_size): batch_latents_mean, batch_latents_logvar = vae_env.encode_imgs(imgs[i:i + batch_size], clip_std=False) batch_latents_noisy = vae_env.reparameterize(batch_latents_mean, batch_latents_logvar, noisy=True) if vae_env.use_reprojection_network: batch_latents_reproj = ptu.get_numpy(vae_env.reproject_encoding(ptu.np_to_var(batch_latents_noisy))) if latents is None: latents = batch_latents_mean latents_noisy = batch_latents_noisy if vae_env.use_reprojection_network: latents_reproj = batch_latents_reproj else: latents = np.concatenate((latents, batch_latents_mean), axis=0) latents_noisy = np.concatenate((latents_noisy, batch_latents_noisy), axis=0) if vae_env.use_reprojection_network: latents_reproj = np.concatenate((latents_reproj, batch_latents_reproj), axis=0) vae_env.sampled_latents = latents vae_env.sampled_latents_noisy = latents_noisy vae_env.sampled_latents_reproj = latents_reproj
def generate_vae_dataset( env_class, N=10000, test_p=0.9, use_cached=True, observation_key='observation', init_camera=None, dataset_path=None, env_kwargs=None, oracle_dataset=False, n_random_steps=100, ): if env_kwargs is None: env_kwargs = {} filename = "/tmp/{}_{}_{}_oracle{}.npy".format( env_class.__name__, str(N), init_camera.__name__ if init_camera else '', oracle_dataset, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env.reset() info['env'] = env observation_dim = env.observation_space.spaces[observation_key].low.size dataset = np.zeros((N, observation_dim)) for i in range(N): if oracle_dataset: goal = env.sample_goal() env.set_to_goal(goal) else: env.reset() for _ in range(n_random_steps): env.step(env.action_space.sample())[0] obs = env.step(env.action_space.sample())[0][observation_key] dataset[i, :] = obs print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) np.random.shuffle(dataset) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, ): filename = "/tmp/sawyer_push_new_easy{}_{}.npy".format( str(N), init_camera.__name__, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEasyEnv(hide_goal=True) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.reset() for _ in range(100): action = env.wrapped_env.action_space.sample() # action[0] = 0 # action[1] = 1 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img print(i) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, ): filename = "/tmp/sawyer_push_new_easy_wider2_" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerPushXYEasyEnv(hide_goal=True) env = ImageMujocoEnv( env, imsize, transpose=True, init_camera=sawyer_init_camera_zoomed_in, # init_camera=sawyer_init_camera, normalize=True, ) info['env'] = env policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): # env.reset() if i % 100 == 0: g = env.sample_goal_for_rollout() env.set_goal(g) policy.reset() u = policy.get_action_from_raw_action(env.action_space.sample()) img = env.step(u)[0] dataset[i, :] = img if show: # env.render() cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def pretrain(self): if self.oracle_transition_data is not None: filename = local_path_from_s3_or_local_path( self.oracle_transition_data) data = np.load(filename).item() print("adding data to replay buffer...") states, actions, next_states = data['states'], data[ 'actions'], data['next_states'] idx = np.random.permutation(len(states)) states, actions, next_states = states[idx], actions[ idx], next_states[idx] cap = self.replay_buffer.max_size states, actions, next_states = states[: cap], actions[: cap], next_states[: cap] dummy_goal = self.env.sample_goal_for_rollout() for (s, a, next_s, i) in zip(states, actions, next_states, range(len(states))): if i % 10000 == 0: print(i) obs = dict( observation=s, desired_goal=dummy_goal['desired_goal'], achieved_goal=s, state_observation=s, state_desired_goal=dummy_goal['state_desired_goal'], state_achieved_goal=s, ) next_obs = dict( observation=next_s, desired_goal=dummy_goal['desired_goal'], achieved_goal=next_s, state_observation=next_s, state_desired_goal=dummy_goal['state_desired_goal'], state_achieved_goal=next_s, ) self._handle_step( obs, a, np.array([0]), next_obs, np.array([0]), agent_info={}, env_info={}, ) self._handle_rollout_ending()
def dump_reconstructions(vae_env, epoch, n_recon=16): from railrl.core import logger import os.path as osp from torchvision.utils import save_image if vae_env.use_vae_dataset and vae_env.vae_dataset_path is not None: from multiworld.core.image_env import normalize_image from railrl.misc.asset_loader import local_path_from_s3_or_local_path filename = local_path_from_s3_or_local_path(vae_env.vae_dataset_path) dataset = np.load(filename).item() sampled_idx = np.random.choice(dataset['next_obs'].shape[0], n_recon) if vae_env.vae_input_key_prefix == 'state': states = dataset['next_obs'][sampled_idx] imgs = ptu.np_to_var( vae_env.wrapped_env.states_to_images(states) ) recon_samples, _, _ = vae_env.vae(ptu.np_to_var(states)) recon_imgs = ptu.np_to_var( vae_env.wrapped_env.states_to_images(ptu.get_numpy(recon_samples)) ) else: imgs = ptu.np_to_var( normalize_image(dataset['next_obs'][sampled_idx]) ) recon_imgs, _, _, _ = vae_env.vae(imgs) del dataset else: return comparison = torch.cat([ imgs.narrow(start=0, length=vae_env.wrapped_env.image_length, dimension=1).contiguous().view( -1, vae_env.wrapped_env.channels, vae_env.wrapped_env.imsize, vae_env.wrapped_env.imsize ), recon_imgs.contiguous().view( n_recon, vae_env.wrapped_env.channels, vae_env.wrapped_env.imsize, vae_env.wrapped_env.imsize )[:n_recon] ]) if epoch is not None: save_dir = osp.join(logger.get_snapshot_dir(), 'r_%d.png' % epoch) else: save_dir = osp.join(logger.get_snapshot_dir(), 'r.png') save_image(comparison.data.cpu(), save_dir, nrow=n_recon)
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, ): filename = "/tmp/sawyer_xy_pos_control_imgs" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = SawyerReachXYEnv(hide_goal_markers=True) env = ImageEnv( env, imsize, transpose=True, init_camera=init_sawyer_camera_v1, normalize=True, ) info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): # Move the goal out of the image env.reset() for _ in range(50): env.wrapped_env.step(env.wrapped_env.action_space.sample()) img = env.step(env.action_space.sample())[0]['image_observation'] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def update_variant_from_vae(variant): rl_variant = variant['rl_variant'] if 'vae_path' in rl_variant: local_path = local_path_from_s3_or_local_path(osp.join(rl_variant['vae_path'], 'variant.json')) with open(local_path) as f: data = json.load(f) variant['vae_exp_prefix'] = data['exp_prefix'] variant['vae_exp_id'] = data['exp_id'] variant['vae_seed'] = data['seed'] if 'vae_variant' in data: data_vae_variant = data['vae_variant'] else: data_vae_variant = data['train_vae_variant'] # backwards compatibility variant['vae_variant'] = data_vae_variant vae_wrapped_env_kwargs = rl_variant['vae_wrapped_env_kwargs'] vae_wrapped_env_kwargs['vae_dataset_path'] = \ data_vae_variant['generate_vae_dataset_kwargs']['dataset_path']
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, action_space_sampling=False, init_camera=None, env_class=None, env_kwargs=None, ): filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy' info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_kwargs == None: env_kwargs = dict() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) if action_space_sampling: action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5])) for i in range(N): env.set_to_goal(env.sample_goal()) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env else: policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for i in range(N): # Move the goal out of the image env.wrapped_env.set_goal(np.array([100, 100, 100])) if i % 50 == 0: print('Reset') env.reset() exploration_policy.reset() for _ in range(1): action = exploration_policy.get_action()[0] * 10 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def tdm_td3_experiment(variant): import railrl.samplers.rollout_functions as rf import railrl.torch.pytorch_util as ptu from railrl.data_management.obs_dict_replay_buffer import \ ObsDictRelabelingBuffer from railrl.exploration_strategies.base import ( PolicyWrappedWithExplorationStrategy) from railrl.state_distance.tdm_networks import TdmQf, TdmPolicy from railrl.state_distance.tdm_td3 import TdmTd3 from railrl.my_td3 import Actor, Critic, MY_TD3 from railrl.state_distance.subgoal_planner import SubgoalPlanner from railrl.misc.asset_loader import local_path_from_s3_or_local_path from railrl.my_tdm_td3 import MyTdmTd3 import joblib preprocess_rl_variant(variant) env = get_envs(variant) es = get_exploration_strategy(variant, env) observation_key = variant.get('observation_key', 'latent_observation') desired_goal_key = variant.get('desired_goal_key', 'latent_desired_goal') achieved_goal_key = desired_goal_key.replace("desired", "achieved") vectorized = 'vectorized' in env.reward_type variant['algo_kwargs']['tdm_kwargs']['vectorized'] = vectorized variant['replay_buffer_kwargs']['vectorized'] = vectorized args = {'latent_dim': 16, 'device': 'cuda'} #OWN if 'ckpt' in variant: if 'ckpt_epoch' in variant: epoch = variant['ckpt_epoch'] filename = local_path_from_s3_or_local_path( osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch)) else: filename = local_path_from_s3_or_local_path( osp.join(variant['ckpt'], 'params.pkl')) print("Loading ckpt from", filename) data = joblib.load(filename) qf1 = data['qf1'] qf2 = data['qf2'] policy = data['policy'] variant['algo_kwargs']['base_kwargs'][ 'reward_scale'] = policy.reward_scale else: obs_dim = (env.observation_space.spaces[observation_key].low.size) goal_dim = (env.observation_space.spaces[desired_goal_key].low.size) action_dim = env.action_space.low.size max_action = env.action_space.high variant['qf_kwargs']['vectorized'] = vectorized norm_order = env.norm_order variant['qf_kwargs']['norm_order'] = norm_order env.reset() _, rew, _, _ = env.step(env.action_space.sample()) if hasattr(rew, "__len__"): variant['qf_kwargs']['output_dim'] = len(rew) '''qf1 = TdmQf( env=env, observation_dim=obs_dim, goal_dim=goal_dim, action_dim=action_dim, **variant['qf_kwargs'] ) qf2 = TdmQf( env=env, observation_dim=obs_dim, goal_dim=goal_dim, action_dim=action_dim, **variant['qf_kwargs'] ) policy = TdmPolicy( env=env, observation_dim=obs_dim, goal_dim=goal_dim, action_dim=action_dim, reward_scale=variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0), **variant['policy_kwargs'] )''' policy = Actor(obs_dim, action_dim, goal_dim, 1, max_action=max_action, device=args['device'], reward_scale=10.0, networks_hidden=[400, 300]).cuda() qf1 = Critic(obs_dim, action_dim, goal_dim, 1, 4, args['device'], [400, 300]).cuda() qf2 = Critic(obs_dim, action_dim, goal_dim, 1, 4, args['device'], [400, 300]).cuda() eval_policy = None if variant.get('eval_policy', None) == 'SubgoalPlanner': eval_policy = SubgoalPlanner( env, qf1, policy, observation_key=observation_key, desired_goal_key=desired_goal_key, achieved_goal_key=achieved_goal_key, state_based=variant.get("do_state_exp", False), max_tau=variant['algo_kwargs']['tdm_kwargs']['max_tau'], reward_scale=variant['algo_kwargs']['base_kwargs'].get( 'reward_scale', 1.0), **variant['SubgoalPlanner_kwargs']) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) replay_buffer = ObsDictRelabelingBuffer( env=env, observation_key=observation_key, desired_goal_key=desired_goal_key, achieved_goal_key=achieved_goal_key, **variant['replay_buffer_kwargs']) algo_kwargs = variant['algo_kwargs'] algo_kwargs['replay_buffer'] = replay_buffer base_kwargs = algo_kwargs['base_kwargs'] base_kwargs['training_env'] = env base_kwargs['render'] = variant.get("render", False) base_kwargs['render_during_eval'] = variant.get("render_during_eval", False) tdm_kwargs = algo_kwargs['tdm_kwargs'] tdm_kwargs['observation_key'] = observation_key tdm_kwargs['desired_goal_key'] = desired_goal_key '''algorithm = TdmTd3( env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, eval_policy=eval_policy, **variant['algo_kwargs'] )''' algorithm = MyTdmTd3(actor=policy, critic1=qf1, critic2=qf2, max_action=max_action, args=args, env=env, exploration_policy=exploration_policy, eval_policy=eval_policy, **variant['algo_kwargs']) if variant.get("test_ckpt", False): algorithm.post_epoch_funcs.append(get_update_networks_func(variant)) vis_variant = variant.get('vis_kwargs', {}) vis_list = vis_variant.get('vis_list', []) if vis_variant.get("save_video", True): rollout_function = rf.create_rollout_function( rf.tdm_rollout, init_tau=algorithm._sample_max_tau_for_rollout(), decrement_tau=algorithm.cycle_taus_for_rollout, cycle_tau=algorithm.cycle_taus_for_rollout, max_path_length=algorithm.max_path_length, observation_key=algorithm.observation_key, desired_goal_key=algorithm.desired_goal_key, vis_list=vis_list, dont_terminate=True, ) video_func = get_video_save_func( rollout_function, env, variant, ) algorithm.post_epoch_funcs.append(video_func) if ptu.gpu_enabled(): print("using GPU") algorithm.cuda() if not variant.get("do_state_exp", False): env.vae.cuda() env.reset() if not variant.get("do_state_exp", False): env.dump_samples(epoch=None) env.dump_reconstructions(epoch=None) env.dump_latent_plots(epoch=None) algorithm.train()
def process_variant(variant): rl_variant = variant['rl_variant'] if args.debug: rl_variant['algo_kwargs']['base_kwargs']['num_rollouts_per_eval'] = 1 rl_variant['vis_kwargs']['num_samples_for_video'] = 2 rl_variant['vae_wrapped_env_kwargs']['num_samples_for_latent_histogram'] = 100 variant['train_reprojection_network_variant']['num_epochs'] = 1 variant['train_reprojection_network_variant']['generate_reprojection_network_dataset_kwargs']['N'] = int(2 ** 8) if 'env_kwargs' in variant and 'num_goals_presampled' in variant['env_kwargs']: variant['env_kwargs']['num_goals_presampled'] = 10 if 'goal_generation_kwargs' in rl_variant and \ 'num_goals_presampled' in rl_variant['goal_generation_kwargs']: rl_variant['goal_generation_kwargs']['num_goals_presampled'] = 10 assert rl_variant['eval_algo'] in [ 'mb-tdm', 'mf-tdm', ] update_variant_from_ckpt(variant) ckpt_path = local_path_from_s3_or_local_path(osp.join(rl_variant['ckpt'], 'variant.json')) with open(ckpt_path) as f: ckpt_variant = json.load(f) if 'rl_variant' in ckpt_variant: ckpt_rl_variant = ckpt_variant['rl_variant'] else: ckpt_rl_variant = ckpt_variant['grill_variant'] # backwards compatibility if 'mb' in rl_variant['eval_algo']: if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \ rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1 if 'extra_time' in rl_variant['SubgoalPlanner_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] -= \ rl_variant['SubgoalPlanner_kwargs']['extra_time'] if 'max_tau_per_subprob' not in rl_variant['SubgoalPlanner_kwargs']: rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'] = \ ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] else: if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']: rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \ ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] eval_algo = rl_variant['eval_algo'] if eval_algo == 'mb-tdm': rl_variant['eval_policy'] = 'SubgoalPlanner' rl_variant['SubgoalPlanner_kwargs']['reproject_encoding'] = True elif eval_algo == 'mf-tdm': pass rl_variant['eval_algo_base'] = eval_algo rl_variant['eval_algo_tag'] = 'mt=' + str(rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']) if 'mb' in rl_variant['eval_algo']: rl_variant['eval_algo_tag'] = '-'.join([ rl_variant['eval_algo_tag'], 'mtps=' + str(rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob']) ]) rl_variant['eval_algo'] = '-'.join([ rl_variant['eval_algo_base'], rl_variant['eval_algo_tag'] ]) variant['eval_algo_base'] = rl_variant['eval_algo_base'] variant['eval_algo_tag'] = rl_variant['eval_algo_tag'] variant['eval_algo'] = rl_variant['eval_algo']
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, ratio_oracle_policy_data_to_random=1 / 2, action_space_sampling=False, env_class=None, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_space_sampling: env = SawyerDoorPushOpenEnv(**env_kwargs) env = ImageEnv( env, imsize, transpose=False, init_camera=sawyer_door_env_camera, normalize=False, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, env.max_y_pos, .06])) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.set_to_goal_pos(action_space.sample()) #move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env.get_image().flatten() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env elif action_plus_random_sampling: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, .6, .06])) action_sampled_data = int(N / 2) dataset = np.zeros((N, imsize * imsize * 3)) print('Action Space Sampling') for i in range(action_sampled_data): env.set_to_goal_pos(action_space.sample()) # move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: now = time.time() env = SawyerDoorPushOpenEnv(max_angle=.5) env = ImageEnv( env, imsize, transpose=True, init_camera=sawyer_door_env_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): if i % 100 == 0: env.reset() exploration_policy.reset() for _ in range(25): # env.wrapped_env.step( # env.wrapped_env.action_space.sample() # ) action = exploration_policy.get_action()[0] env.wrapped_env.step(action) goal = env.sample_goal_for_rollout() env.set_to_goal(goal) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy" else: filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_plus_random_sampling: if env_id is not None: import gym env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_sampled_data = int(N*ratio_action_sample_to_random) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) print('Action Space Sampling') for i in range(action_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20==0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step( action ) goal = env.sample_goal() env.set_to_goal_angle(goal['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: raise NotImplementedError() n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( env_class, N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, oracle_dataset=False, n_random_steps=100, ): if env_kwargs is None: env_kwargs = {} filename = "/tmp/{}_{}_{}_oracle{}.npy".format( env_class.__name__, str(N), init_camera.__name__, oracle_dataset, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.reset() info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): if oracle_dataset: goal = env.sample_goal() env.set_to_goal(goal) else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def ih_td3_experiment(variant): import railrl.samplers.rollout_functions as rf import railrl.torch.pytorch_util as ptu from railrl.data_management.obs_dict_replay_buffer import \ ObsDictRelabelingBuffer from railrl.exploration_strategies.base import ( PolicyWrappedWithExplorationStrategy ) from railrl.misc.asset_loader import local_path_from_s3_or_local_path import joblib from railrl.torch.her.her_td3 import HerTd3 from railrl.torch.networks import FlattenMlp, TanhMlpPolicy from railrl.state_distance.subgoal_planner import InfiniteHorizonSubgoalPlanner preprocess_rl_variant(variant) env = get_envs(variant) es = get_exploration_strategy(variant, env) observation_key = variant.get('observation_key', 'latent_observation') desired_goal_key = variant.get('desired_goal_key', 'latent_desired_goal') achieved_goal_key = desired_goal_key.replace("desired", "achieved") vectorized = 'vectorized' in env.reward_type variant['replay_buffer_kwargs']['vectorized'] = vectorized if 'ckpt' in variant: if 'ckpt_epoch' in variant: epoch = variant['ckpt_epoch'] filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch)) else: filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'params.pkl')) print("Loading ckpt from", filename) data = joblib.load(filename) qf1 = data['qf1'] qf2 = data['qf2'] policy = data['policy'] else: obs_dim = ( env.observation_space.spaces[observation_key].low.size + env.observation_space.spaces[desired_goal_key].low.size ) action_dim = env.action_space.low.size env.reset() _, rew, _, _ = env.step(env.action_space.sample()) if hasattr(rew, "__len__"): output_size = len(rew) else: output_size = 1 qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=output_size, **variant['qf_kwargs'] ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=output_size, **variant['qf_kwargs'] ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, **variant['policy_kwargs'] ) policy.reward_scale = variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0) eval_policy = None if variant.get('eval_policy', None) == 'SubgoalPlanner': eval_policy = InfiniteHorizonSubgoalPlanner( env, qf1, policy, observation_key=observation_key, desired_goal_key=desired_goal_key, achieved_goal_key=achieved_goal_key, state_based=variant.get("do_state_exp", False), max_tau=variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1, reward_scale=variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0), **variant['SubgoalPlanner_kwargs'] ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) replay_buffer = ObsDictRelabelingBuffer( env=env, observation_key=observation_key, desired_goal_key=desired_goal_key, achieved_goal_key=achieved_goal_key, **variant['replay_buffer_kwargs'] ) algo_kwargs = variant['algo_kwargs'] algo_kwargs['replay_buffer'] = replay_buffer base_kwargs = algo_kwargs['base_kwargs'] base_kwargs['training_env'] = env base_kwargs['render'] = variant.get("render", False) base_kwargs['render_during_eval'] = variant.get("render_during_eval", False) her_kwargs = algo_kwargs['her_kwargs'] her_kwargs['observation_key'] = observation_key her_kwargs['desired_goal_key'] = desired_goal_key algorithm = HerTd3( env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, eval_policy=eval_policy, **variant['algo_kwargs'] ) if variant.get("test_ckpt", False): algorithm.post_epoch_funcs.append(get_update_networks_func(variant)) vis_variant = variant.get('vis_kwargs', {}) vis_list = vis_variant.get('vis_list', []) if vis_variant.get("save_video", True): rollout_function = rf.create_rollout_function( rf.multitask_rollout, max_path_length=algorithm.max_path_length, observation_key=algorithm.observation_key, desired_goal_key=algorithm.desired_goal_key, vis_list=vis_list, dont_terminate=True, ) video_func = get_video_save_func( rollout_function, env, variant, ) algorithm.post_epoch_funcs.append(video_func) if ptu.gpu_enabled(): print("using GPU") algorithm.cuda() if not variant.get("do_state_exp", False): env.vae.cuda() env.reset() if not variant.get("do_state_exp", False): env.dump_samples(epoch=None) env.dump_latent_plots(epoch=None) env.dump_latent_plots(epoch=None) algorithm.train()
def generate_vae_dataset_from_params( env_class=None, env_kwargs=None, env_id=None, N=10000, test_p=0.9, use_cached=True, imsize=84, num_channels=1, show=False, init_camera=None, dataset_path=None, oracle_dataset=False, n_random_steps=100, vae_dataset_specific_env_kwargs=None, save_file_prefix=None, use_linear_dynamics=False, ): from multiworld.core.image_env import ImageEnv, unormalize_image from railrl.misc.asset_loader import local_path_from_s3_or_local_path import time assert oracle_dataset == True if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_oracle{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, oracle_dataset, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) np.random.shuffle(dataset) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) np.random.shuffle(dataset) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) setup_pickup_image_env(env, num_presampled_goals=N) env.reset() info['env'] = env dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): img = env._presampled_goals['image_desired_goal'][i] dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) time.sleep(.2) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.random.shuffle(dataset) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, env_class=None, env_kwargs=None, init_camera=sawyer_door_env_camera, ): filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) oracle_sampled_data = int(N / 2) dataset = np.zeros((N, imsize * imsize * 3)) print('Goal Space Sampling') for i in range(oracle_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(oracle_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def train_vae(variant): from railrl.misc.ml_util import PiecewiseLinearSchedule from railrl.torch.vae.conv_vae import ConvVAE from railrl.torch.vae.conv_vae_trainer import ConvVAETrainer from railrl.core import logger import railrl.torch.pytorch_util as ptu from multiworld.core.image_env import ImageEnv from railrl.envs.vae_wrappers import VAEWrappedEnv from railrl.misc.asset_loader import local_path_from_s3_or_local_path logger.remove_tabular_output('progress.csv', relative_to_snapshot_dir=True) logger.add_tabular_output('vae_progress.csv', relative_to_snapshot_dir=True) env_id = variant['generate_vae_dataset_kwargs'].get('env_id', None) if env_id is not None: import gym env = gym.make(env_id) else: env_class = variant['generate_vae_dataset_kwargs']['env_class'] env_kwargs = variant['generate_vae_dataset_kwargs']['env_kwargs'] env = env_class(**env_kwargs) representation_size = variant["representation_size"] beta = variant["beta"] if 'beta_schedule_kwargs' in variant: beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None # obtain training and testing data dataset_path = variant['generate_vae_dataset_kwargs'].get( 'dataset_path', None) test_p = variant['generate_vae_dataset_kwargs'].get('test_p', 0.9) filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename, allow_pickle=True).item() N = dataset['obs'].shape[0] n = int(N * test_p) train_data = {} test_data = {} for k in dataset.keys(): train_data[k] = dataset[k][:n, :] test_data[k] = dataset[k][n:, :] # setup vae variant['vae_kwargs']['action_dim'] = train_data['actions'].shape[1] if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae import VAE input_size = train_data['obs'].shape[1] variant['vae_kwargs']['input_size'] = input_size m = VAE(representation_size, **variant['vae_kwargs']) elif variant.get('vae_type', None) == "VAE2": from railrl.torch.vae.conv_vae2 import ConvVAE2 variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE2(representation_size, **variant['vae_kwargs']) else: variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE(representation_size, **variant['vae_kwargs']) if ptu.gpu_enabled(): m.cuda() # setup vae trainer if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae_trainer import VAETrainer t = VAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) else: t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) # visualization vis_variant = variant.get('vis_kwargs', {}) save_video = vis_variant.get('save_video', False) if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant['generate_vae_dataset_kwargs'].get('imsize'), init_camera=variant['generate_vae_dataset_kwargs'].get( 'init_camera'), transpose=True, normalize=True, ) render = variant.get('render', False) reward_params = variant.get("reward_params", dict()) vae_env = VAEWrappedEnv(image_env, m, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) vae_env.reset() vae_env.add_mode("video_env", 'video_env') vae_env.add_mode("video_vae", 'video_vae') if save_video: import railrl.samplers.rollout_functions as rf from railrl.policies.simple import RandomPolicy random_policy = RandomPolicy(vae_env.action_space) rollout_function = rf.create_rollout_function( rf.multitask_rollout, max_path_length=100, observation_key='latent_observation', desired_goal_key='latent_desired_goal', vis_list=vis_variant.get('vis_list', []), dont_terminate=True, ) dump_video_kwargs = variant.get("dump_video_kwargs", dict()) dump_video_kwargs['imsize'] = vae_env.imsize dump_video_kwargs['vis_list'] = [ 'image_observation', 'reconstr_image_observation', 'image_latent_histogram_2d', 'image_latent_histogram_mu_2d', 'image_plt', 'image_rew', 'image_rew_euclidean', 'image_rew_mahalanobis', 'image_rew_logp', 'image_rew_kl', 'image_rew_kl_rev', ] def visualization_post_processing(save_vis, save_video, epoch): vis_list = vis_variant.get('vis_list', []) if save_vis: if vae_env.vae_input_key_prefix == 'state': vae_env.dump_reconstructions(epoch, n_recon=vis_variant.get( 'n_recon', 16)) vae_env.dump_samples(epoch, n_samples=vis_variant.get('n_samples', 64)) if 'latent_representation' in vis_list: vae_env.dump_latent_plots(epoch) if any(elem in vis_list for elem in [ 'latent_histogram', 'latent_histogram_mu', 'latent_histogram_2d', 'latent_histogram_mu_2d' ]): vae_env.compute_latent_histogram() if not save_video and ('latent_histogram' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=True, use_true_prior=True) if not save_video and ('latent_histogram_mu' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=False, use_true_prior=True) if save_video and save_vis: from railrl.envs.vae_wrappers import temporary_mode from railrl.misc.video_gen import dump_video from railrl.core import logger vae_env.compute_goal_encodings() logdir = logger.get_snapshot_dir() filename = osp.join(logdir, 'video_{epoch}.mp4'.format(epoch=epoch)) variant['dump_video_kwargs']['epoch'] = epoch temporary_mode(vae_env, mode='video_env', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) if not vis_variant.get('save_video_env_only', True): filename = osp.join( logdir, 'video_{epoch}_vae.mp4'.format(epoch=epoch)) temporary_mode(vae_env, mode='video_vae', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) # train vae for epoch in range(variant['num_epochs']): #for epoch in range(2000): save_vis = (epoch % vis_variant['save_period'] == 0 or epoch == variant['num_epochs'] - 1) save_vae = (epoch % variant['snapshot_gap'] == 0 or epoch == variant['num_epochs'] - 1) t.train_epoch(epoch) '''if epoch % 500 == 0 or epoch == variant['num_epochs']-1: t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 200 == 0 or epoch == variant['num_epochs']-1: visualization_post_processing(save_video, save_video, epoch)''' t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 300 == 0 or epoch == variant['num_epochs'] - 1: visualization_post_processing(save_vis, save_video, epoch) logger.save_extra_data(m, 'vae.pkl', mode='pickle') logger.remove_tabular_output( 'vae_progress.csv', relative_to_snapshot_dir=True, ) logger.add_tabular_output( 'progress.csv', relative_to_snapshot_dir=True, ) print("finished --------------------!!!!!!!!!!!!!!!") return m