def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data, info = generate_vae_dataset( **variant['get_data_kwargs']) logger.save_extra_data(info) logger.get_snapshot_dir() if 'beta_schedule_kwargs' in variant: beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None m = ConvVAE(representation_size, input_channels=3) if ptu.gpu_enabled(): m.to(ptu.device) gpu_id = variant.get("gpu_id", None) if gpu_id is not None: ptu.set_device(gpu_id) t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) save_period = variant['save_period'] for epoch in range(variant['num_epochs']): should_save_imgs = (epoch % save_period == 0) t.train_epoch(epoch) t.test_epoch(epoch, save_reconstruction=should_save_imgs, save_scatterplot=should_save_imgs) if should_save_imgs: t.dump_samples(epoch)
def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] # train_data, test_data, info = generate_vae_dataset( # **variant['get_data_kwargs'] # ) num_divisions = 5 images = np.zeros((num_divisions * 10000, 21168)) for i in range(num_divisions): imgs = np.load( '/home/murtaza/vae_data/sawyer_torque_control_images100000_' + str(i + 1) + '.npy') images[i * 10000:(i + 1) * 10000] = imgs print(i) mid = int(num_divisions * 10000 * .9) train_data, test_data = images[:mid], images[mid:] info = dict() logger.save_extra_data(info) logger.get_snapshot_dir() if 'beta_schedule_kwargs' in variant: kwargs = variant['beta_schedule_kwargs'] kwargs['y_values'][2] = variant['beta'] kwargs['x_values'][1] = variant['flat_x'] kwargs['x_values'][2] = variant['ramp_x'] + variant['flat_x'] beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None m = ConvVAE(representation_size, input_channels=3, **variant['conv_vae_kwargs']) if ptu.gpu_enabled(): m.cuda() t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) save_period = variant['save_period'] for epoch in range(variant['num_epochs']): should_save_imgs = (epoch % save_period == 0) t.train_epoch(epoch) t.test_epoch(epoch, save_reconstruction=should_save_imgs, save_scatterplot=should_save_imgs) if should_save_imgs: t.dump_samples(epoch)
def experiment(variant): beta = variant["beta"] representation_size = variant["representation_size"] m = ConvVAE(representation_size, beta=beta) for epoch in range(10): m.train_epoch(epoch) m.test_epoch(epoch) m.dump_samples(epoch)
def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] #this has both states and images so can't use generate vae dataset X = np.load( '/home/murtaza/vae_data/sawyer_torque_control_ou_imgs_zoomed_out10000.npy' ) Y = np.load( '/home/murtaza/vae_data/sawyer_torque_control_ou_states_zoomed_out10000.npy' ) Y = np.concatenate((Y[:, :7], Y[:, 14:]), axis=1) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.1) info = dict() logger.save_extra_data(info) logger.get_snapshot_dir() if 'beta_schedule_kwargs' in variant: beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None m = ConvVAE(representation_size, input_channels=3, state_sim_debug=True, state_size=Y.shape[1], **variant['conv_vae_kwargs']) if ptu.gpu_enabled(): m.cuda() t = ConvVAETrainer((X_train, Y_train), (X_test, Y_test), m, beta=beta, beta_schedule=beta_schedule, state_sim_debug=True, **variant['algo_kwargs']) save_period = variant['save_period'] for epoch in range(variant['num_epochs']): should_save_imgs = (epoch % save_period == 0) t.train_epoch(epoch) t.test_epoch(epoch, save_reconstruction=should_save_imgs, save_scatterplot=should_save_imgs) if should_save_imgs: t.dump_samples(epoch)
def experiment(variant): beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data = get_data(10000) m = ConvVAE(representation_size) t = ConvVAETrainer(train_data, test_data, m, beta=beta, use_cuda=False) for epoch in range(10): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def experiment(variant): if variant["use_gpu"]: ptu.set_gpu_mode(True) beta = variant["beta"] representation_size = variant["representation_size"] m = ConvVAE(representation_size, input_channels=3) t = ConvVAETrainer(train_data, test_data, m, beta=beta) for epoch in range(1001): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def experiment(variant): c = joblib.load( "/Users/ashvin/data/s3doodad/ashvin/vae/point2d-conv/run0/id0/params.pkl" ) import pdb pdb.set_trace() beta = variant["beta"] representation_size = variant["representation_size"] m = ConvVAE(representation_size, beta=beta) for epoch in range(10): m.train_epoch(epoch) m.test_epoch(epoch) m.dump_samples(epoch)
def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data, info = get_data(**variant['get_data_kwargs']) logger.save_extra_data(info) logger.get_snapshot_dir() beta_schedule = PiecewiseLinearSchedule(**variant['beta_schedule_kwargs']) m = ConvVAE(representation_size, input_channels=3) if ptu.gpu_enabled(): m.to(ptu.device) t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) for epoch in range(variant['num_epochs']): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data, info = generate_vae_dataset( **variant['generate_vae_dataset_kwargs']) logger.save_extra_data(info) logger.get_snapshot_dir() if 'beta_schedule_kwargs' in variant: # kwargs = variant['beta_schedule_kwargs'] # kwargs['y_values'][2] = variant['beta'] # kwargs['x_values'][1] = variant['flat_x'] # kwargs['x_values'][2] = variant['ramp_x'] + variant['flat_x'] variant['beta_schedule_kwargs']['y_values'][-1] = variant['beta'] beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None m = ConvVAE(representation_size, input_channels=3, **variant['conv_vae_kwargs']) if ptu.gpu_enabled(): m.cuda() t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) save_period = variant['save_period'] for epoch in range(variant['num_epochs']): should_save_imgs = (epoch % save_period == 0) t.train_epoch(epoch) t.test_epoch(epoch, save_reconstruction=should_save_imgs, save_scatterplot=should_save_imgs) if should_save_imgs: t.dump_samples(epoch)
def experiment(variant): if variant["use_gpu"]: gpu_id = variant["gpu_id"] ptu.set_gpu_mode(True) ptu.set_device(gpu_id) beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data = get_data(10000) m = ConvVAE(representation_size, input_channels=3) t = ConvVAETrainer(train_data, test_data, m, beta=beta, use_cuda=True) for epoch in range(50): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def experiment(variant): if variant["use_gpu"]: gpu_id = variant["gpu_id"] ptu.set_gpu_mode(True) ptu.set_device(gpu_id) beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data = get_data(10000) m = ConvVAE(representation_size) t = ConvVAETrainer(train_data, test_data, m, beta=beta, do_scatterplot=False) for epoch in range(101): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def experiment(variant): if variant["use_gpu"]: gpu_id = variant["gpu_id"] ptu.set_gpu_mode(True) ptu.set_device(gpu_id) beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data = get_data(10000) m = ConvVAE(representation_size, input_channels=3) t = ConvVAETrainer(train_data, test_data, m, beta_schedule=PiecewiseLinearSchedule([0, 400, 800], [0.5, 0.5, beta])) for epoch in range(1001): t.train_epoch(epoch) t.test_epoch(epoch) t.dump_samples(epoch)
def __init__( self, representation_size, architecture, action_dim, dynamics_type=None, encoder_class=CNN, decoder_class=DCNN, decoder_output_activation=identity, decoder_distribution='bernoulli', input_channels=1, num_labels=0, imsize=48, init_w=1e-3, min_variance=1e-3, hidden_init=nn.init.xavier_uniform_, add_labels_to_latents=False, reconstruction_channels=3, base_depth=32, weight_init_gain=1.0, ): super().__init__(representation_size, architecture, encoder_class, decoder_class, decoder_output_activation, decoder_distribution, num_labels, input_channels, imsize, init_w, min_variance, add_labels_to_latents, hidden_init, reconstruction_channels, base_depth, weight_init_gain) self.CDVAE = CDVAE(representation_size, architecture, action_dim, dynamics_type, encoder_class, decoder_class, decoder_output_activation, decoder_distribution, input_channels, num_labels, imsize, init_w, min_variance, hidden_init, add_labels_to_latents, reconstruction_channels, base_depth, weight_init_gain) self.adversary = ConvVAE(representation_size[0], architecture, encoder_class, decoder_class, decoder_output_activation, decoder_distribution, input_channels, imsize, init_w, min_variance, hidden_init)
def _pointmass_fixed_goal_experiment(vae_latent_size, replay_buffer_size, cnn_kwargs, vae_kwargs, policy_kwargs, qf_kwargs, e2e_trainer_kwargs, sac_trainer_kwargs, algorithm_kwargs, eval_path_collector_kwargs=None, expl_path_collector_kwargs=None, **kwargs): if expl_path_collector_kwargs is None: expl_path_collector_kwargs = {} if eval_path_collector_kwargs is None: eval_path_collector_kwargs = {} from multiworld.core.image_env import ImageEnv from multiworld.envs.pygame.point2d import Point2DEnv from multiworld.core.flat_goal_env import FlatGoalEnv env = Point2DEnv( images_are_rgb=True, render_onscreen=False, show_goal=False, ball_radius=2, render_size=48, fixed_goal=(0, 0), ) env = ImageEnv(env, imsize=env.render_size, transpose=True, normalize=True) env = FlatGoalEnv(env) #, append_goal_to_obs=True) input_width, input_height = env.image_shape action_dim = int(np.prod(env.action_space.shape)) vae = ConvVAE( representation_size=vae_latent_size, input_channels=3, imsize=input_width, decoder_output_activation=nn.Sigmoid(), # decoder_distribution='gaussian_identity_variance', **vae_kwargs) encoder = Vae2Encoder(vae) def make_cnn(): return networks.CNN(input_width=input_width, input_height=input_height, input_channels=3, output_conv_channels=True, output_size=None, **cnn_kwargs) def make_qf(): return networks.MlpQfWithObsProcessor(obs_processor=nn.Sequential( encoder, networks.Flatten(), ), output_size=1, input_size=action_dim + vae_latent_size, **qf_kwargs) qf1 = make_qf() qf2 = make_qf() target_qf1 = make_qf() target_qf2 = make_qf() action_dim = int(np.prod(env.action_space.shape)) policy_cnn = make_cnn() policy = TanhGaussianPolicyAdapter( nn.Sequential(policy_cnn, networks.Flatten()), policy_cnn.conv_output_flat_size, action_dim, **policy_kwargs) eval_env = expl_env = env eval_policy = MakeDeterministic(policy) eval_path_collector = MdpPathCollector(eval_env, eval_policy, **eval_path_collector_kwargs) replay_buffer = EnvReplayBuffer( replay_buffer_size, expl_env, ) vae_trainer = VAETrainer(vae) sac_trainer = SACTrainer(env=eval_env, policy=policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, **sac_trainer_kwargs) trainer = End2EndSACTrainer( sac_trainer=sac_trainer, vae_trainer=vae_trainer, **e2e_trainer_kwargs, ) expl_path_collector = MdpPathCollector(expl_env, policy, **expl_path_collector_kwargs) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **algorithm_kwargs) algorithm.to(ptu.device) algorithm.train()
def train_vae(variant): from railrl.misc.ml_util import PiecewiseLinearSchedule from railrl.torch.vae.conv_vae import ConvVAE from railrl.torch.vae.conv_vae_trainer import ConvVAETrainer from railrl.core import logger import railrl.torch.pytorch_util as ptu from multiworld.core.image_env import ImageEnv from railrl.envs.vae_wrappers import VAEWrappedEnv from railrl.misc.asset_loader import local_path_from_s3_or_local_path logger.remove_tabular_output('progress.csv', relative_to_snapshot_dir=True) logger.add_tabular_output('vae_progress.csv', relative_to_snapshot_dir=True) env_id = variant['generate_vae_dataset_kwargs'].get('env_id', None) if env_id is not None: import gym env = gym.make(env_id) else: env_class = variant['generate_vae_dataset_kwargs']['env_class'] env_kwargs = variant['generate_vae_dataset_kwargs']['env_kwargs'] env = env_class(**env_kwargs) representation_size = variant["representation_size"] beta = variant["beta"] if 'beta_schedule_kwargs' in variant: beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None # obtain training and testing data dataset_path = variant['generate_vae_dataset_kwargs'].get( 'dataset_path', None) test_p = variant['generate_vae_dataset_kwargs'].get('test_p', 0.9) filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename, allow_pickle=True).item() N = dataset['obs'].shape[0] n = int(N * test_p) train_data = {} test_data = {} for k in dataset.keys(): train_data[k] = dataset[k][:n, :] test_data[k] = dataset[k][n:, :] # setup vae variant['vae_kwargs']['action_dim'] = train_data['actions'].shape[1] if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae import VAE input_size = train_data['obs'].shape[1] variant['vae_kwargs']['input_size'] = input_size m = VAE(representation_size, **variant['vae_kwargs']) elif variant.get('vae_type', None) == "VAE2": from railrl.torch.vae.conv_vae2 import ConvVAE2 variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE2(representation_size, **variant['vae_kwargs']) else: variant['vae_kwargs']['imsize'] = variant['imsize'] m = ConvVAE(representation_size, **variant['vae_kwargs']) if ptu.gpu_enabled(): m.cuda() # setup vae trainer if variant.get('vae_type', None) == "VAE-state": from railrl.torch.vae.vae_trainer import VAETrainer t = VAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) else: t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) # visualization vis_variant = variant.get('vis_kwargs', {}) save_video = vis_variant.get('save_video', False) if isinstance(env, ImageEnv): image_env = env else: image_env = ImageEnv( env, variant['generate_vae_dataset_kwargs'].get('imsize'), init_camera=variant['generate_vae_dataset_kwargs'].get( 'init_camera'), transpose=True, normalize=True, ) render = variant.get('render', False) reward_params = variant.get("reward_params", dict()) vae_env = VAEWrappedEnv(image_env, m, imsize=image_env.imsize, decode_goals=render, render_goals=render, render_rollouts=render, reward_params=reward_params, **variant.get('vae_wrapped_env_kwargs', {})) vae_env.reset() vae_env.add_mode("video_env", 'video_env') vae_env.add_mode("video_vae", 'video_vae') if save_video: import railrl.samplers.rollout_functions as rf from railrl.policies.simple import RandomPolicy random_policy = RandomPolicy(vae_env.action_space) rollout_function = rf.create_rollout_function( rf.multitask_rollout, max_path_length=100, observation_key='latent_observation', desired_goal_key='latent_desired_goal', vis_list=vis_variant.get('vis_list', []), dont_terminate=True, ) dump_video_kwargs = variant.get("dump_video_kwargs", dict()) dump_video_kwargs['imsize'] = vae_env.imsize dump_video_kwargs['vis_list'] = [ 'image_observation', 'reconstr_image_observation', 'image_latent_histogram_2d', 'image_latent_histogram_mu_2d', 'image_plt', 'image_rew', 'image_rew_euclidean', 'image_rew_mahalanobis', 'image_rew_logp', 'image_rew_kl', 'image_rew_kl_rev', ] def visualization_post_processing(save_vis, save_video, epoch): vis_list = vis_variant.get('vis_list', []) if save_vis: if vae_env.vae_input_key_prefix == 'state': vae_env.dump_reconstructions(epoch, n_recon=vis_variant.get( 'n_recon', 16)) vae_env.dump_samples(epoch, n_samples=vis_variant.get('n_samples', 64)) if 'latent_representation' in vis_list: vae_env.dump_latent_plots(epoch) if any(elem in vis_list for elem in [ 'latent_histogram', 'latent_histogram_mu', 'latent_histogram_2d', 'latent_histogram_mu_2d' ]): vae_env.compute_latent_histogram() if not save_video and ('latent_histogram' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=True, use_true_prior=True) if not save_video and ('latent_histogram_mu' in vis_list): vae_env.dump_latent_histogram(epoch=epoch, noisy=False, use_true_prior=True) if save_video and save_vis: from railrl.envs.vae_wrappers import temporary_mode from railrl.misc.video_gen import dump_video from railrl.core import logger vae_env.compute_goal_encodings() logdir = logger.get_snapshot_dir() filename = osp.join(logdir, 'video_{epoch}.mp4'.format(epoch=epoch)) variant['dump_video_kwargs']['epoch'] = epoch temporary_mode(vae_env, mode='video_env', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) if not vis_variant.get('save_video_env_only', True): filename = osp.join( logdir, 'video_{epoch}_vae.mp4'.format(epoch=epoch)) temporary_mode(vae_env, mode='video_vae', func=dump_video, args=(vae_env, random_policy, filename, rollout_function), kwargs=variant['dump_video_kwargs']) # train vae for epoch in range(variant['num_epochs']): #for epoch in range(2000): save_vis = (epoch % vis_variant['save_period'] == 0 or epoch == variant['num_epochs'] - 1) save_vae = (epoch % variant['snapshot_gap'] == 0 or epoch == variant['num_epochs'] - 1) t.train_epoch(epoch) '''if epoch % 500 == 0 or epoch == variant['num_epochs']-1: t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 200 == 0 or epoch == variant['num_epochs']-1: visualization_post_processing(save_video, save_video, epoch)''' t.test_epoch( epoch, save_reconstruction=save_vis, save_interpolation=save_vis, save_vae=save_vae, ) if epoch % 300 == 0 or epoch == variant['num_epochs'] - 1: visualization_post_processing(save_vis, save_video, epoch) logger.save_extra_data(m, 'vae.pkl', mode='pickle') logger.remove_tabular_output( 'vae_progress.csv', relative_to_snapshot_dir=True, ) logger.add_tabular_output( 'progress.csv', relative_to_snapshot_dir=True, ) print("finished --------------------!!!!!!!!!!!!!!!") return m