def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None, init_camera=sawyer_door_env_camera_v2, ): if policy_path is not None: filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy" else: filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) env.wrapped_env.reset() dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) for i in range(N): if i % 20==0: env.reset_model() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step( action ) # env.set_to_goal_angle(env.get_goal()['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_goal_data_set(env=None, num_goals=1000, use_cached_dataset=False, action_scale=1 / 10): if use_cached_dataset and osp.isfile( '/tmp/goals' + str(num_goals) + '.npy'): goal_dict = np.load('/tmp/goals' + str(num_goals) + '.npy').item() print("loaded data from saved file") return goal_dict cached_goal_keys = [ 'latent_desired_goal', 'image_desired_goal', 'state_desired_goal', 'joint_desired_goal', ] goal_sizes = [ env.observation_space.spaces['latent_desired_goal'].low.size, env.observation_space.spaces['image_desired_goal'].low.size, env.observation_space.spaces['state_desired_goal'].low.size, 7 ] observation_keys = [ 'latent_observation', 'image_observation', 'state_observation', 'state_observation', ] goal_generation_dict = dict() for goal_key, goal_size, obs_key in zip( cached_goal_keys, goal_sizes, observation_keys, ): goal_generation_dict[goal_key] = [goal_size, obs_key] goal_dict = dict() policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for goal_key in goal_generation_dict: goal_size, obs_key = goal_generation_dict[goal_key] goal_dict[goal_key] = np.zeros((num_goals, goal_size)) print('Generating Random Goals') for i in range(num_goals): if i % 50 == 0: print('Reset') env.reset_model() exploration_policy.reset() action = exploration_policy.get_action()[0] * action_scale obs, _, _, _ = env.step( action ) print(i) for goal_key in goal_generation_dict: goal_size, obs_key = goal_generation_dict[goal_key] goal_dict[goal_key][i, :] = obs[obs_key] np.save('/tmp/goals' + str(num_goals) + '.npy', goal_dict) return goal_dict
def visualize_policy_error(qf, env, args): model = NumpyModelExtractor(qf, args.cheat, num_steps_left=args.tau) policy = RandomPolicy(env.action_space) actual_state = env.reset() predicted_states = [] actual_states = [] predicted_state = actual_state for _ in range(args.H): predicted_states.append(predicted_state.copy()) actual_states.append(actual_state.copy()) action, _ = policy.get_action(actual_state) predicted_state = model.next_state(predicted_state, action) actual_state = env.step(action)[0] predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) times = np.arange(args.H) num_state_dims = env.observation_space.low.size dims = list(range(num_state_dims)) norm = colors.Normalize(vmin=0, vmax=num_state_dims) mapper = cm.ScalarMappable(norm=norm, cmap=cm.hsv) for dim in dims: plt.plot( times, predicted_states[:, dim], '--', label='Predicted, Dim {}'.format(dim), color=mapper.to_rgba(dim), ) plt.plot( times, actual_states[:, dim], '-', label='Actual, Dim {}'.format(dim), color=mapper.to_rgba(dim), ) plt.xlabel("Time Steps") plt.ylabel("Observation Value") plt.legend(loc='best') plt.show()
def pretrain(self): if (self.num_paths_for_normalization == 0 or (self.obs_normalizer is None and self.action_normalizer is None)): return pretrain_paths = [] random_policy = RandomPolicy(self.env.action_space) while len(pretrain_paths) < self.num_paths_for_normalization: path = rollout(self.env, random_policy, self.max_path_length) pretrain_paths.append(path) ob_mean, ob_std, delta_mean, delta_std, ac_mean, ac_std = ( compute_normalization(pretrain_paths)) if self.obs_normalizer is not None: self.obs_normalizer.set_mean(ob_mean) self.obs_normalizer.set_std(ob_std) if self.delta_normalizer is not None: self.delta_normalizer.set_mean(delta_mean) self.delta_normalizer.set_std(delta_std) if self.action_normalizer is not None: self.action_normalizer.set_mean(ac_mean) self.action_normalizer.set_std(ac_std)
from railrl.state_distance.rollout_util import multitask_rollout if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('file', type=str, help='path to the snapshot file') parser.add_argument('--pause', action='store_true') args = parser.parse_args() if args.pause: import ipdb; ipdb.set_trace() data = joblib.load(args.file) env = data['env'] qf = data['qf'] policy = data['policy'] tdm_policy = data['trained_policy'] random_policy = RandomPolicy(env.action_space) vf = data['vf'] path = multitask_rollout( env, random_policy, init_tau=0, max_path_length=100, animated=True, ) goal = env.sample_goal_for_rollout() import ipdb; ipdb.set_trace() agent_infos = path['agent_infos']
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, action_space_sampling=False, init_camera=None, env_class=None, env_kwargs=None, ): filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy' info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_kwargs == None: env_kwargs = dict() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) if action_space_sampling: action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5])) for i in range(N): env.set_to_goal(env.sample_goal()) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env else: policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for i in range(N): # Move the goal out of the image env.wrapped_env.set_goal(np.array([100, 100, 100])) if i % 50 == 0: print('Reset') env.reset() exploration_policy.reset() for _ in range(1): action = exploration_policy.get_action()[0] * 10 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, ratio_oracle_policy_data_to_random=1 / 2, action_space_sampling=False, env_class=None, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_space_sampling: env = SawyerDoorPushOpenEnv(**env_kwargs) env = ImageEnv( env, imsize, transpose=False, init_camera=sawyer_door_env_camera, normalize=False, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, env.max_y_pos, .06])) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.set_to_goal_pos(action_space.sample()) #move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env.get_image().flatten() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env elif action_plus_random_sampling: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, .6, .06])) action_sampled_data = int(N / 2) dataset = np.zeros((N, imsize * imsize * 3)) print('Action Space Sampling') for i in range(action_sampled_data): env.set_to_goal_pos(action_space.sample()) # move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: now = time.time() env = SawyerDoorPushOpenEnv(max_angle=.5) env = ImageEnv( env, imsize, transpose=True, init_camera=sawyer_door_env_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): if i % 100 == 0: env.reset() exploration_policy.reset() for _ in range(25): # env.wrapped_env.step( # env.wrapped_env.action_space.sample() # ) action = exploration_policy.get_action()[0] env.wrapped_env.step(action) goal = env.sample_goal_for_rollout() env.set_to_goal(goal) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy" else: filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_plus_random_sampling: if env_id is not None: import gym env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_sampled_data = int(N*ratio_action_sample_to_random) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) print('Action Space Sampling') for i in range(action_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20==0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step( action ) goal = env.sample_goal() env.set_to_goal_angle(goal['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: raise NotImplementedError() n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def main(): model_data = joblib.load(MODEL_PATH) model = model_data['model'] tdm_data = joblib.load(TDM_PATH) env = tdm_data['env'] qf = tdm_data['qf'] variant_path = Path(TDM_PATH).parents[0] / 'variant.json' variant = json.load(variant_path.open()) reward_scale = variant['sac_tdm_kwargs']['base_kwargs']['reward_scale'] tdm = ImplicitModel(qf, None) random_policy = RandomPolicy(env.action_space) H = 10 path = rollout(env, random_policy, max_path_length=H) model_distance_preds = [] tdm_distance_preds = [] for ob, action, next_ob in zip( path['observations'], path['actions'], path['next_observations'], ): obs = ob[None] actions = action[None] next_feature = env.convert_ob_to_goal(next_ob) model_next_ob_pred = ob + model.eval_np(obs, actions)[0] model_distance_pred = np.abs( env.convert_ob_to_goal(model_next_ob_pred) - next_feature) tdm_next_feature_pred = get_feasible_goal(env, tdm, ob, action) tdm_distance_pred = np.abs(tdm_next_feature_pred - next_feature) model_distance_preds.append(model_distance_pred) tdm_distance_preds.append(tdm_distance_pred) model_distances = np.array(model_distance_preds) tdm_distances = np.array(tdm_distance_preds) ts = np.arange(len(model_distance_preds)) num_dim = model_distances[0].size ind = np.arange(num_dim) width = 0.35 fig, ax = plt.subplots() means = model_distances.mean(axis=0) stds = model_distances.std(axis=0) rects1 = ax.bar(ind, means, width, color='r', yerr=stds) means = tdm_distances.mean(axis=0) stds = tdm_distances.std(axis=0) rects2 = ax.bar(ind + width, means, width, color='y', yerr=stds) ax.legend((rects1[0], rects2[0]), ('Model', 'TDM')) ax.set_xlabel("Dimension") ax.set_ylabel("Absolute Error") ax.set_xticks(ind + width / 2) ax.set_xticklabels(list(map(str, ind))) plt.show() plt.subplot(2, 1, 1) for i in range(num_dim): plt.plot( ts, model_distances[:, i], label=str(i), ) plt.xlabel("Time") plt.ylabel("Absolute Error") plt.title("Model") plt.legend() plt.subplot(2, 1, 2) for i in range(num_dim): plt.plot( ts, tdm_distances[:, i], label=str(i), ) plt.xlabel("Time") plt.ylabel("Absolute Error") plt.title("TDM") plt.legend() plt.show() goal = env.convert_ob_to_goal(path['observations'][H // 2].copy()) path = rollout(env, random_policy, max_path_length=H) model_distance_preds = [] tdm_distance_preds = [] for ob, action, next_ob in zip( path['observations'], path['actions'], path['next_observations'], ): model_next_ob_pred = ob + model.eval_np(ob[None], action[None])[0] model_distance_pred = np.linalg.norm( env.convert_ob_to_goal(model_next_ob_pred) - goal) tdm_distance_pred = tdm.eval_np( ob[None], goal[None], np.zeros((1, 1)), action[None], )[0] / reward_scale model_distance_preds.append(model_distance_pred) tdm_distance_preds.append(tdm_distance_pred) fig, ax = plt.subplots() means = model_distances.mean(axis=0) stds = model_distances.std(axis=0) rects1 = ax.bar(ind, means, width, color='r', yerr=stds) means = tdm_distances.mean(axis=0) stds = tdm_distances.std(axis=0) rects2 = ax.bar(ind + width, means, width, color='y', yerr=stds) ax.legend((rects1[0], rects2[0]), ('Model', 'TDM')) ax.set_xlabel("Dimension") ax.set_ylabel("Error To Random Goal State") ax.set_xticks(ind + width / 2) ax.set_xticklabels(list(map(str, ind))) plt.show()
def train_vae(variant): from railrl.misc.ml_util import PiecewiseLinearSchedule from railrl.torch.vae.conv_vae import ConvVAE from railrl.torch.vae.conv_vae_trainer import ConvVAETrainer from railrl.core import logger import railrl.torch.pytorch_util as ptu from multiworld.core.image_env import ImageEnv from railrl.envs.vae_wrappers import VAEWrappedEnv from railrl.misc.asset_loader import local_path_from_s3_or_local_path logger.remove_tabular_output('progress.csv', relative_to_snapshot_dir=True) logger.add_tabular_output('vae_progress.csv', relative_to_snapshot_dir=True) env_id = variant['generate_vae_dataset_kwargs'].get('env_id', None) if env_id is not None: import gym env = gym.make(env_id) else: env_class = variant['generate_vae_dataset_kwargs']['env_class'] env_kwargs = variant['generate_vae_dataset_kwargs']['env_kwargs'] env = env_class(**env_kwargs) representation_size = variant["representation_size"] beta = variant["beta"] if 'beta_schedule_kwargs' in variant: beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None # obtain training and testing data dataset_path = variant['generate_vae_dataset_kwargs'].get( 'dataset_path', None) test_p = variant['generate_vae_dataset_kwargs'].get('test_p', 0.9) filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename, allow_pickle=True).item() N = dataset['obs'].shape[0] n = int(N * test_p) train_data = {} test_data = {} for k in dataset.keys(): train_data[k] = dataset[k][:n, :] test_data[k] = dataset[k][n:, :] # setup vae variant['vae_kwargs']['action_dim'] = train_data['actions'].shape[1] if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae import VAE input_size = train_data['obs'].shape[1] variant['vae_kwargs']['input_size'] = input_size m = VAE(representation_size, **variant['vae_kwargs']) elif variant.get('vae_type', None) == "VAE2": from railrl.torch.vae.conv_vae2 import ConvVAE2 variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE2(representation_size, **variant['vae_kwargs']) else: variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE(representation_size, **variant['vae_kwargs']) if ptu.gpu_enabled(): m.cuda() # setup vae trainer if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae_trainer import VAETrainer t = VAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) else: t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) # visualization vis_variant = variant.get('vis_kwargs', {}) save_video = vis_variant.get('save_video', False) if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant['generate_vae_dataset_kwargs'].get('imsize'), init_camera=variant['generate_vae_dataset_kwargs'].get( 'init_camera'), transpose=True, normalize=True, ) render = variant.get('render', False) reward_params = variant.get("reward_params", dict()) vae_env = VAEWrappedEnv(image_env, m, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) vae_env.reset() vae_env.add_mode("video_env", 'video_env') vae_env.add_mode("video_vae", 'video_vae') if save_video: import railrl.samplers.rollout_functions as rf from railrl.policies.simple import RandomPolicy random_policy = RandomPolicy(vae_env.action_space) rollout_function = rf.create_rollout_function( rf.multitask_rollout, max_path_length=100, observation_key='latent_observation', desired_goal_key='latent_desired_goal', vis_list=vis_variant.get('vis_list', []), dont_terminate=True, ) dump_video_kwargs = variant.get("dump_video_kwargs", dict()) dump_video_kwargs['imsize'] = vae_env.imsize dump_video_kwargs['vis_list'] = [ 'image_observation', 'reconstr_image_observation', 'image_latent_histogram_2d', 'image_latent_histogram_mu_2d', 'image_plt', 'image_rew', 'image_rew_euclidean', 'image_rew_mahalanobis', 'image_rew_logp', 'image_rew_kl', 'image_rew_kl_rev', ] def visualization_post_processing(save_vis, save_video, epoch): vis_list = vis_variant.get('vis_list', []) if save_vis: if vae_env.vae_input_key_prefix == 'state': vae_env.dump_reconstructions(epoch, n_recon=vis_variant.get( 'n_recon', 16)) vae_env.dump_samples(epoch, n_samples=vis_variant.get('n_samples', 64)) if 'latent_representation' in vis_list: vae_env.dump_latent_plots(epoch) if any(elem in vis_list for elem in [ 'latent_histogram', 'latent_histogram_mu', 'latent_histogram_2d', 'latent_histogram_mu_2d' ]): vae_env.compute_latent_histogram() if not save_video and ('latent_histogram' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=True, use_true_prior=True) if not save_video and ('latent_histogram_mu' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=False, use_true_prior=True) if save_video and save_vis: from railrl.envs.vae_wrappers import temporary_mode from railrl.misc.video_gen import dump_video from railrl.core import logger vae_env.compute_goal_encodings() logdir = logger.get_snapshot_dir() filename = osp.join(logdir, 'video_{epoch}.mp4'.format(epoch=epoch)) variant['dump_video_kwargs']['epoch'] = epoch temporary_mode(vae_env, mode='video_env', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) if not vis_variant.get('save_video_env_only', True): filename = osp.join( logdir, 'video_{epoch}_vae.mp4'.format(epoch=epoch)) temporary_mode(vae_env, mode='video_vae', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) # train vae for epoch in range(variant['num_epochs']): #for epoch in range(2000): save_vis = (epoch % vis_variant['save_period'] == 0 or epoch == variant['num_epochs'] - 1) save_vae = (epoch % variant['snapshot_gap'] == 0 or epoch == variant['num_epochs'] - 1) t.train_epoch(epoch) '''if epoch % 500 == 0 or epoch == variant['num_epochs']-1: t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 200 == 0 or epoch == variant['num_epochs']-1: visualization_post_processing(save_video, save_video, epoch)''' t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 300 == 0 or epoch == variant['num_epochs'] - 1: visualization_post_processing(save_vis, save_video, epoch) logger.save_extra_data(m, 'vae.pkl', mode='pickle') logger.remove_tabular_output( 'vae_progress.csv', relative_to_snapshot_dir=True, ) logger.add_tabular_output( 'progress.csv', relative_to_snapshot_dir=True, ) print("finished --------------------!!!!!!!!!!!!!!!") return m
def visualize_policy_error(model, env, horizon): policy = RandomPolicy(env.action_space) actual_state = env.reset() predicted_states = [] actual_states = [] predicted_state = actual_state for _ in range(horizon): predicted_states.append(predicted_state.copy()) actual_states.append(actual_state.copy()) action, _ = policy.get_action(actual_state) delta = get_np_prediction(model, predicted_state, action) predicted_state += delta actual_state = env.step(action)[0] predicted_states = np.array(predicted_states) actual_states = np.array(actual_states) times = np.arange(horizon) num_state_dims = env.observation_space.low.size dims = list(range(num_state_dims)) norm = colors.Normalize(vmin=0, vmax=num_state_dims) mapper = cm.ScalarMappable(norm=norm, cmap=cm.hsv) # Plot the predicted and actual values plt.subplot(2, 1, 1) for dim in dims: plt.plot( times, predicted_states[:, dim], '--', label='Predicted, Dim {}'.format(dim), color=mapper.to_rgba(dim), ) plt.plot( times, actual_states[:, dim], '-', label='Actual, Dim {}'.format(dim), color=mapper.to_rgba(dim), ) plt.xlabel("Time Steps") plt.ylabel("Observation Value") plt.legend(loc='best') # Plot the predicted and actual value errors plt.subplot(2, 1, 2) for dim in dims: plt.plot( times, np.abs(predicted_states[:, dim] - actual_states[:, dim]), '-', label='Dim {}'.format(dim), color=mapper.to_rgba(dim), ) plt.xlabel("Time Steps") plt.ylabel("|Predicted - Actual| - Absolute Error") plt.legend(loc='best') plt.show() nrows = min(5, num_state_dims) ncols = math.ceil(num_state_dims / nrows) fig = plt.figure() for dim in dims: ax = fig.add_subplot(nrows, ncols, dim+1) ax.plot( times, predicted_states[:, dim], '--', label='Predicted, Dim {}'.format(dim), ) ax.plot( times, actual_states[:, dim], '-', label='Actual, Dim {}'.format(dim), ) ax.set_ylabel("Observation Value") ax.set_xlabel("Time Steps") ax.set_title("Dim {}".format(dim)) ax_error = ax.twinx() ax_error.plot( times, np.abs(predicted_states[:, dim] - actual_states[:, dim]), '.', label='Error, Dim {}'.format(dim), color='r', ) ax_error.set_ylabel("Error", color='r') ax_error.tick_params('y', colors='r') ax.legend(loc='best') plt.show()
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, env_class=None, env_kwargs=None, init_camera=sawyer_door_env_camera, ): filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) oracle_sampled_data = int(N / 2) dataset = np.zeros((N, imsize * imsize * 3)) print('Goal Space Sampling') for i in range(oracle_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(oracle_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info