def generate_uniform_dataset_reacher(env_class=None, env_kwargs=None, num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, show=False, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco if not env_class or not env_kwargs: env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images print('Sampling Uniform Dataset') dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8) for j in range(num_imgs): obs = env.reset() env.set_to_goal(env.get_goal()) img_f = env._get_flat_img() if show: img = img_f.reshape(3, env.imsize, env.imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print(j) dataset[j, :] = unormalize_image(img_f) np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def generate_uniform_dataset_pick_and_place(env_class=None, env_kwargs=None, num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco if not env_class or not env_kwargs: env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images print('Sampling Uniform Dataset') dataset = unormalize_image( get_image_presampled_goals(env, num_imgs)['image_desired_goal']) np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def HER_baseline_td3_experiment(variant): import rlkit.torch.pytorch_util as ptu from rlkit.data_management.obs_dict_replay_buffer import \ ObsDictRelabelingBuffer from rlkit.exploration_strategies.base import ( PolicyWrappedWithExplorationStrategy) from rlkit.torch.her.her_td3 import HerTd3 from rlkit.torch.networks import MergedCNN, CNNPolicy import torch from multiworld.core.image_env import ImageEnv from rlkit.misc.asset_loader import load_local_or_remote_file init_camera = variant.get("init_camera", None) presample_goals = variant.get('presample_goals', False) presampled_goals_path = get_presampled_goals_path( variant.get('presampled_goals_path', None)) if 'env_id' in variant: import gym import multiworld multiworld.register_all_envs() env = gym.make(variant['env_id']) else: env = variant["env_class"](**variant['env_kwargs']) image_env = ImageEnv( env, variant.get('imsize'), reward_type='image_sparse', init_camera=init_camera, transpose=True, normalize=True, ) if presample_goals: if presampled_goals_path is None: image_env.non_presampled_goal_img_is_garbage = True presampled_goals = variant['generate_goal_dataset_fctn']( env=image_env, **variant['goal_generation_kwargs']) else: presampled_goals = load_local_or_remote_file( presampled_goals_path).item() del image_env env = ImageEnv( env, variant.get('imsize'), reward_type='image_distance', init_camera=init_camera, transpose=True, normalize=True, presampled_goals=presampled_goals, ) else: env = image_env es = get_exploration_strategy(variant, env) observation_key = variant.get('observation_key', 'image_observation') desired_goal_key = variant.get('desired_goal_key', 'image_desired_goal') achieved_goal_key = desired_goal_key.replace("desired", "achieved") imsize = variant['imsize'] action_dim = env.action_space.low.size qf1 = MergedCNN(input_width=imsize, input_height=imsize, output_size=1, input_channels=3 * 2, added_fc_input_size=action_dim, **variant['cnn_params']) qf2 = MergedCNN(input_width=imsize, input_height=imsize, output_size=1, input_channels=3 * 2, added_fc_input_size=action_dim, **variant['cnn_params']) policy = CNNPolicy( input_width=imsize, input_height=imsize, added_fc_input_size=0, output_size=action_dim, input_channels=3 * 2, output_activation=torch.tanh, **variant['cnn_params'], ) target_qf1 = MergedCNN(input_width=imsize, input_height=imsize, output_size=1, input_channels=3 * 2, added_fc_input_size=action_dim, **variant['cnn_params']) target_qf2 = MergedCNN(input_width=imsize, input_height=imsize, output_size=1, input_channels=3 * 2, added_fc_input_size=action_dim, **variant['cnn_params']) target_policy = CNNPolicy( input_width=imsize, input_height=imsize, added_fc_input_size=0, output_size=action_dim, input_channels=3 * 2, output_activation=torch.tanh, **variant['cnn_params'], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) replay_buffer = ObsDictRelabelingBuffer( env=env, observation_key=observation_key, desired_goal_key=desired_goal_key, achieved_goal_key=achieved_goal_key, **variant['replay_buffer_kwargs']) algo_kwargs = variant['algo_kwargs'] algo_kwargs['replay_buffer'] = replay_buffer base_kwargs = algo_kwargs['base_kwargs'] base_kwargs['training_env'] = env base_kwargs['render'] = variant["render"] base_kwargs['render_during_eval'] = variant["render"] her_kwargs = algo_kwargs['her_kwargs'] her_kwargs['observation_key'] = observation_key her_kwargs['desired_goal_key'] = desired_goal_key algorithm = HerTd3(env, qf1=qf1, qf2=qf2, policy=policy, target_qf1=target_qf1, target_qf2=target_qf2, target_policy=target_policy, exploration_policy=exploration_policy, **variant['algo_kwargs']) algorithm.to(ptu.device) algorithm.train()
def get_envs(variant): from multiworld.core.image_env import ImageEnv from rlkit.envs.vae_wrapper import VAEWrappedEnv from rlkit.util.io import load_local_or_remote_file render = variant.get('render', False) vae_path = variant.get("vae_path", None) reward_params = variant.get("reward_params", dict()) init_camera = variant.get("init_camera", None) do_state_exp = variant.get("do_state_exp", False) presample_goals = variant.get('presample_goals', False) presample_image_goals_only = variant.get('presample_image_goals_only', False) presampled_goals_path = variant.get('presampled_goals_path', None) vae = load_local_or_remote_file( vae_path) if type(vae_path) is str else vae_path if 'env_id' in variant: import gym import multiworld multiworld.register_all_envs() env = gym.make(variant['env_id']) else: env = variant["env_class"](**variant['env_kwargs']) if not do_state_exp: if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant.get('imsize'), init_camera=init_camera, transpose=True, normalize=True, ) if presample_goals: """ This will fail for online-parallel as presampled_goals will not be serialized. Also don't use this for online-vae. """ if presampled_goals_path is None: image_env.non_presampled_goal_img_is_garbage = True vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) presampled_goals = variant['generate_goal_dataset_fctn']( env=vae_env, env_id=variant.get('env_id', None), **variant['goal_generation_kwargs']) del vae_env else: presampled_goals = load_local_or_remote_file( presampled_goals_path).item() del image_env image_env = ImageEnv(env, variant.get('imsize'), init_camera=init_camera, transpose=True, normalize=True, presampled_goals=presampled_goals, **variant.get('image_env_kwargs', {})) vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, presampled_goals=presampled_goals, **variant.get('vae_wrapped_env_kwargs', {})) print("Presampling all goals only") else: vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) if presample_image_goals_only: presampled_goals = variant['generate_goal_dataset_fctn']( image_env=vae_env.wrapped_env, **variant['goal_generation_kwargs']) image_env.set_presampled_goals(presampled_goals) print("Presampling image goals only") else: print("Not using presampled goals") env = vae_env return env
def generate_vae_dataset(variant): env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu info = {} if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def get_envs(variant): render = variant.get('render', False) vae_path = variant.get("vae_path", None) reward_params = variant.get("reward_params", dict()) init_camera = variant.get("init_camera", None) presample_goals = variant.get('presample_goals', False) presample_image_goals_only = variant.get('presample_image_goals_only', False) presampled_goals_path = variant.get('presampled_goals_path', None) vae = load_local_or_remote_file( vae_path) if type(vae_path) is str else vae_path if 'env_id' in variant: env = gym.make(variant['env_id']) else: env = variant["env_class"](**variant['env_kwargs']) if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant.get('imsize'), init_camera=init_camera, transpose=True, normalize=True, ) if presample_goals: """ This will fail for online-parallel as presampled_goals will not be serialized. Also don't use this for online-vae. """ if presampled_goals_path is None: image_env.non_presampled_goal_img_is_garbage = True vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) presampled_goals = variant['generate_goal_dataset_fctn']( env=vae_env, env_id=variant.get('env_id', None), **variant['goal_generation_kwargs']) del vae_env else: presampled_goals = load_local_or_remote_file( presampled_goals_path).item() del image_env image_env = ImageEnv(env, variant.get('imsize'), init_camera=init_camera, transpose=True, normalize=True, presampled_goals=presampled_goals, **variant.get('image_env_kwargs', {})) vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, presampled_goals=presampled_goals, **variant.get('vae_wrapped_env_kwargs', {})) print("Presampling all goals only") else: vae_env = VAEWrappedEnv(image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) if presample_image_goals_only: presampled_goals = variant['generate_goal_dataset_fctn']( image_env=vae_env.wrapped_env, **variant['goal_generation_kwargs']) image_env.set_presampled_goals(presampled_goals) print("Presampling image goals only") else: print("Not using presampled goals") env = vae_env training_mode = variant.get("training_mode", "train") testing_mode = variant.get("testing_mode", "test") env.add_mode('eval', testing_mode) env.add_mode('train', training_mode) env.add_mode('relabeling', training_mode) # relabeling_env.disable_render() env.add_mode("video_vae", 'video_vae') env.add_mode("video_env", 'video_env') return env
def get_envs(variant): from multiworld.core.image_env import ImageEnv from railrl.envs.vae_wrappers import VAEWrappedEnv from railrl.misc.asset_loader import load_local_or_remote_file render = variant.get('render', False) vae_path = variant.get("vae_path", None) reproj_vae_path = variant.get("reproj_vae_path", None) ckpt = variant.get("ckpt", None) reward_params = variant.get("reward_params", dict()) init_camera = variant.get("init_camera", None) do_state_exp = variant.get("do_state_exp", False) presample_goals = variant.get('presample_goals', False) presample_image_goals_only = variant.get('presample_image_goals_only', False) presampled_goals_path = variant.get('presampled_goals_path', None) if not do_state_exp and type(ckpt) is str: vae = load_local_or_remote_file(osp.join(ckpt, 'vae.pkl')) if vae is not None: from railrl.core import logger logger.save_extra_data(vae, 'vae.pkl', mode='pickle') else: vae = None if vae is None and type(vae_path) is str: vae = load_local_or_remote_file(osp.join(vae_path, 'vae_params.pkl')) from railrl.core import logger logger.save_extra_data(vae, 'vae.pkl', mode='pickle') elif vae is None: vae = vae_path if type(vae) is str: vae = load_local_or_remote_file(vae) else: vae = vae if type(reproj_vae_path) is str: reproj_vae = load_local_or_remote_file(osp.join(reproj_vae_path, 'vae_params.pkl')) else: reproj_vae = None if 'env_id' in variant: import gym # trigger registration env = gym.make(variant['env_id']) else: env = variant["env_class"](**variant['env_kwargs']) if not do_state_exp: if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant.get('imsize'), init_camera=init_camera, transpose=True, normalize=True, ) vae_env = VAEWrappedEnv( image_env, vae, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, reproj_vae=reproj_vae, **variant.get('vae_wrapped_env_kwargs', {}) ) if presample_goals: """ This will fail for online-parallel as presampled_goals will not be serialized. Also don't use this for online-vae. """ if presampled_goals_path is None: image_env.non_presampled_goal_img_is_garbage = True presampled_goals = variant['generate_goal_dataset_fctn']( image_env=image_env, **variant['goal_generation_kwargs'] ) else: presampled_goals = load_local_or_remote_file( presampled_goals_path ).item() presampled_goals = { 'state_desired_goal': presampled_goals['next_obs_state'], 'image_desired_goal': presampled_goals['next_obs'], } image_env.set_presampled_goals(presampled_goals) vae_env.set_presampled_goals(presampled_goals) print("Presampling all goals") else: if presample_image_goals_only: presampled_goals = variant['generate_goal_dataset_fctn']( image_env=vae_env.wrapped_env, **variant['goal_generation_kwargs'] ) image_env.set_presampled_goals(presampled_goals) print("Presampling image goals only") else: print("Not using presampled goals") env = vae_env if not do_state_exp: training_mode = variant.get("training_mode", "train") testing_mode = variant.get("testing_mode", "test") env.add_mode('eval', testing_mode) env.add_mode('train', training_mode) env.add_mode('relabeling', training_mode) # relabeling_env.disable_render() env.add_mode("video_vae", 'video_vae') env.add_mode("video_env", 'video_env') return env
def generate_vae_dataset(variant): print(variant) from tqdm import tqdm env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) batch_size = variant.get('batch_size', 128) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) augment_data = variant.get('augment_data', False) data_filter_fn = variant.get('data_filter_fn', lambda x: x) delete_after_loading = variant.get('delete_after_loading', False) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_rollout_data_set_to_goal = variant.get( 'random_rollout_data_set_to_goal', True) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) conditional_vae_dataset = variant.get('conditional_vae_dataset', False) use_env_labels = variant.get('use_env_labels', False) use_linear_dynamics = variant.get('use_linear_dynamics', False) enviorment_dataset = variant.get('enviorment_dataset', False) save_trajectories = variant.get('save_trajectories', False) save_trajectories = save_trajectories or use_linear_dynamics or conditional_vae_dataset tag = variant.get('tag', '') assert N % n_random_steps == 0, "Fix N/horizon or dataset generation will fail" from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu from rlkit.util.io import load_local_or_remote_file from rlkit.data_management.dataset import ( TrajectoryDataset, ImageObservationDataset, InitialObservationDataset, EnvironmentDataset, ConditionalDynamicsDataset, InitialObservationNumpyDataset, InfiniteBatchLoader, InitialObservationNumpyJitteringDataset) info = {} use_test_dataset = False if dataset_path is not None: if type(dataset_path) == str: dataset = load_local_or_remote_file( dataset_path, delete_after_loading=delete_after_loading) dataset = dataset.item() N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, list): dataset = concatenate_datasets(dataset_path) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, dict): if type(dataset_path['train']) == str: dataset = load_local_or_remote_file( dataset_path['train'], delete_after_loading=delete_after_loading) dataset = dataset.item() elif isinstance(dataset_path['train'], list): dataset = concatenate_datasets(dataset_path['train']) if type(dataset_path['test']) == str: test_dataset = load_local_or_remote_file( dataset_path['test'], delete_after_loading=delete_after_loading) test_dataset = test_dataset.item() elif isinstance(dataset_path['test'], list): test_dataset = concatenate_datasets(dataset_path['test']) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] use_test_dataset = True else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera and hasattr(init_camera, '__name__') else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = load_local_or_remote_file( filename, delete_after_loading=delete_after_loading) if conditional_vae_dataset: dataset = dataset.item() print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) if save_trajectories: dataset = { 'observations': np.zeros((N // n_random_steps, n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), 'actions': np.zeros((N // n_random_steps, n_random_steps, env.action_space.shape[0]), dtype=np.float), 'env': np.zeros( (N // n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), } else: dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) labels = [] for i in tqdm(range(N)): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif random_rollout_data: #ADD DATA WHERE JUST PUCK MOVES if i % n_random_steps == 0: env.reset() policy.reset() env_img = env._get_obs()['image_observation'] if random_rollout_data_set_to_goal: env.set_to_goal(env.get_goal()) obs = env._get_obs() u = policy.get_action_from_raw_action( env.action_space.sample()) env.step(u) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] if use_env_labels: labels.append(obs['label']) if save_trajectories: dataset['observations'][ i // n_random_steps, i % n_random_steps, :] = unormalize_image(img) dataset['actions'][i // n_random_steps, i % n_random_steps, :] = u dataset['env'][i // n_random_steps, :] = unormalize_image( env_img) else: dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) #np.save(filename[:-4] + 'labels.npy', np.array(labels)) info['train_labels'] = [] info['test_labels'] = [] dataset = data_filter_fn(dataset) if use_linear_dynamics and conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :], 'env': dataset['env'][:n, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :], 'env': dataset['env'][n:, :] }) num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] try: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], 'env': dataset['env'][test_i, :] }) except: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], }) elif use_linear_dynamics: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = TrajectoryDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :] }) test_dataset = TrajectoryDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :] }) elif enviorment_dataset: n = int(n_random_steps * test_p) train_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, :n, :], }) test_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, n:, :], }) elif conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] if augment_data: dataset_class = InitialObservationNumpyJitteringDataset else: dataset_class = InitialObservationNumpyDataset if 'env' not in dataset: dataset['env'] = dataset['observations'][:, 0] if use_test_dataset and ('env' not in test_dataset): test_dataset['env'] = test_dataset['observations'][:, 0] if use_test_dataset: train_dataset = dataset_class({ 'observations': dataset['observations'], 'env': dataset['env'] }) test_dataset = dataset_class({ 'observations': test_dataset['observations'], 'env': test_dataset['env'] }) else: train_dataset = dataset_class({ 'observations': dataset['observations'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = dataset_class({ 'observations': dataset['observations'][test_i, :, :], 'env': dataset['env'][test_i, :] }) train_batch_loader_kwargs = variant.get( 'train_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) test_batch_loader_kwargs = variant.get( 'test_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) train_data_loader = data.DataLoader(train_dataset, shuffle=True, drop_last=True, **train_batch_loader_kwargs) test_data_loader = data.DataLoader(test_dataset, shuffle=True, drop_last=True, **test_batch_loader_kwargs) train_dataset = InfiniteBatchLoader(train_data_loader) test_dataset = InfiniteBatchLoader(test_data_loader) else: n = int(N * test_p) train_dataset = ImageObservationDataset(dataset[:n, :]) test_dataset = ImageObservationDataset(dataset[n:, :]) return train_dataset, test_dataset, info
def getdata(variant): skewfit_variant = variant['skewfit_variant'] print('-------------------------------') skewfit_preprocess_variant(skewfit_variant) skewfit_variant['render'] = True vae_environment = get_envs(skewfit_variant) print('done loading vae_env') env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu info = {} if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if True: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): print("using(ImageEnv)") env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(10): NP = [] if True: print(i) #print('th step') goal = env.sample_goal() # print("goal___________________________") # print(goal) # print("goal___________________________") env.set_to_goal(goal) obs = env._get_obs() #img = img.reshape(3, imsize, imsize).transpose() # img = img[::-1, :, ::-1] # cv2.imshow('img', img) # cv2.waitKey(1) img_1 = obs['image_observation'] img_1 = img_1.reshape(3, imsize, imsize).transpose() NP.append(img_1) if i % 3 == 0: cv2.imshow('img1', img_1) cv2.waitKey(1) #img_1_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose() encoded_1 = vae_environment._get_encoded( obs['image_observation']) print(encoded_1) NP.append(encoded_1) img_1_reconstruct = vae_environment._get_img( encoded_1).transpose() NP.append(img_1_reconstruct) #dataset[i, :] = unormalize_image(img) # img_1 = img_1.reshape(3, imsize, imsize).transpose() if i % 3 == 0: cv2.imshow('img1_reconstruction', img_1_reconstruct) cv2.waitKey(1) env.reset() instr = env.generate_new_state(goal) if i % 3 == 0: print(instr) obs = env._get_obs() # obs = env._get_obs() img_2 = obs['image_observation'] img_2 = img_2.reshape(3, imsize, imsize).transpose() NP.append(img_2) if i % 3 == 0: cv2.imshow('img2', img_2) cv2.waitKey(1) #img_2_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose() encoded_2 = vae_environment._get_encoded( obs['image_observation']) NP.append(encoded_2) img_2_reconstruct = vae_environment._get_img( encoded_2).transpose() NP.append(img_2_reconstruct) NP.append(instr) # img_2 = img_2.reshape(3, imsize, imsize).transpose() if i % 3 == 0: cv2.imshow('img2_reconstruct', img_2_reconstruct) cv2.waitKey(1) NP = np.array(NP) idx = str(i) name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy" np.save(open(name, 'wb'), NP) # radius = input('waiting...') # #get the in between functions import dill import pickle get_encoded = dill.dumps(vae_environment._get_encoded) with open( "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt", "wb") as fp: pickle.dump(get_encoded, fp) with open( "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt", "rb") as fp: b = pickle.load(fp) func_get_encoded = dill.loads(b) encoded = func_get_encoded(obs['image_observation']) print(encoded) print('------------------------------') get_img = dill.dumps(vae_environment._get_img) with open( "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt", "wb") as fp: pickle.dump(get_img, fp) with open( "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt", "rb") as fp: c = pickle.load(fp) func_get_img = dill.loads(c) img_1_reconstruct = func_get_img(encoded).transpose() print(img_1_reconstruct) #dataset[i, :] = unormalize_image(img) # img_1 = img_1.reshape(3, imsize, imsize).transpose() cv2.imshow('test', img_1_reconstruct) cv2.waitKey(0) print("done making training data", filename, time.time() - now) np.save(filename, dataset)
def generate_vae_dataset(variant): """ If not provided a pre-train vae dataset generation function, this function will be used to collect the dataset for training vae. """ import rlkit.torch.pytorch_util as ptu import gym import multiworld multiworld.register_all_envs() print("generating vae dataset with original images") env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') info = {} if dataset_path is not None: print('load vae training dataset from: ', dataset_path) pjhome = os.environ['PJHOME'] dataset = np.load(osp.join(pjhome, dataset_path), allow_pickle=True).item() if isinstance(dataset, dict): dataset = dataset['image_desired_goal'] dataset = unormalize_image(dataset) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: print("using totally random rollouts") for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs[ 'image_observation'] # NOTE yufei: this is already normalized image, of detype np.float64. dataset[i, :] = unormalize_image(img) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_uniform_dataset_door(num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, policy_file=None, show=False, path_length=100, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco env = gym.make(env_id) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) print('Sampling Uniform Dataset') dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8) for j in range(num_imgs): obs = env.reset() policy.reset() for i in range(path_length): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) img_f = obs['image_observation'] if show: img = obs['image_observation'] img = img.reshape(3, env.imsize, env.imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print(j) dataset[j, :] = unormalize_image(img_f) temp = env.reset_free env.reset_free = True env.reset() env.reset_free = temp np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def generate_vae_dataset(variant): import cv2 env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) use_images = variant.get('use_images', True) imsize = variant.get('imsize', 84) show = variant.get('show', False) init_camera = variant.get('init_camera', None) oracle_dataset = variant.get('oracle_dataset', False) if 'n_random_steps' in variant: n_random_steps = variant['n_random_steps'] else: if oracle_dataset: n_random_steps = 3 else: n_random_steps = 100 vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) from multiworld.core.image_env import ImageEnv, unormalize_image info = {} from railrl.core import logger logdir = logger.get_snapshot_dir() filename = osp.join(logdir, "vae_dataset.npy") now = time.time() if env_id is not None: import gym env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if use_images: data_size = len(env.observation_space.spaces['image_observation'].low) dtype = np.uint8 else: data_size = len(env.observation_space.spaces['state_observation'].low) dtype = np.float32 state_size = len(env.observation_space.spaces['state_observation'].low) dataset = { 'obs': np.zeros((N, data_size), dtype=dtype), 'actions': np.zeros((N, len(env.action_space.low)), dtype=np.float32), 'next_obs': np.zeros((N, data_size), dtype=dtype), 'obs_state': np.zeros((N, state_size), dtype=np.float32), 'next_obs_state': np.zeros((N, state_size), dtype=np.float32), } for i in range(N): if i % (N / 50) == 0: print(i) if oracle_dataset: if i % 100 == 0: env.reset() goal = env.sample_goal() env.set_to_goal(goal) for _ in range(n_random_steps): env.step(env.action_space.sample()) else: env.reset() for _ in range(n_random_steps): env.step(env.action_space.sample()) obs = env._get_obs() if use_images: dataset['obs'][i, :] = unormalize_image(obs['image_observation']) else: dataset['obs'][i, :] = obs['state_observation'] dataset['obs_state'][i, :] = obs['state_observation'] action = env.action_space.sample() dataset['actions'][i, :] = action obs = env.step(action)[0] img = obs['image_observation'] if use_images: dataset['next_obs'][i, :] = unormalize_image(img) else: dataset['next_obs'][i, :] = obs['state_observation'] dataset['next_obs_state'][i, :] = obs['state_observation'] show = True #todo del own if show: img = img.reshape(3, imsize, imsize).transpose((1, 2, 0)) img = img[::, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1000) print("keys and shapes:") for k in dataset.keys(): print(k, dataset[k].shape) print("done making training data", filename, time.time() - now) np.save(filename, dataset)
def generate_vae_dataset(variant): env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu info = {} if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(10000): NP = [] if oracle_dataset_using_set_to_goal: print(i) #print('th step') goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() #img = img.reshape(3, imsize, imsize).transpose() # img = img[::-1, :, ::-1] # cv2.imshow('img', img) # cv2.waitKey(1) img_1 = obs['image_observation'] NP.append(img_1) #dataset[i, :] = unormalize_image(img) img_1 = img_1.reshape(3, imsize, imsize).transpose() if i % 3 == 0: cv2.imshow('img1', img_1) cv2.waitKey(1) env.reset() instr = env.generate_new_state(goal) if i % 3 == 0: print(instr) obs = env._get_obs() # obs = env._get_obs() img_2 = obs['image_observation'] NP.append(img_2) NP.append(instr) img_2 = img_2.reshape(3, imsize, imsize).transpose() if i % 3 == 0: cv2.imshow('img2', img_2) cv2.waitKey(1) NP = np.array(NP) print(NP) idx = str(i) name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy" np.save(open(name, 'wb'), NP) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info