def __init__( self, env, experience_replay, backward=True, hidden_layer_size=1024, n_hidden_layers=3, learning_rate=3e-4, tensorboard_log=None, checkpoint_folder=None, latent_space=None, n_out_states=1, gauss_stdev=None, play_data_range=None, ): self.env = env self.experience_replay = experience_replay self.backward = backward self.action_space_shape = list(self.env.action_space.shape) self.n_out_states = n_out_states self.gauss_stdev = gauss_stdev # self.individual_likelihood_factor = 0.0001 self._hidden_layer_size = hidden_layer_size self._n_hidden_layers = n_hidden_layers if latent_space == "gridworld": self.latent_space = GridworldSpace(env) else: self.latent_space = latent_space if self.latent_space is None: assert len(self.env.observation_space.shape) == 1 self.state_size = self.env.observation_space.shape[0] else: self.state_size = self.latent_space.state_size self._define_input_placeholders() self._define_model() self.loss = self._define_loss() # self.learning_rate, self.global_step = get_learning_rate(1e-2, 20, 0.98) self.learning_rate, self.global_step = get_learning_rate( learning_rate, None, 1) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.gradients = self.optimizer.compute_gradients(loss=self.loss) self.optimization_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step) self.tensorboard_log = tensorboard_log if self.tensorboard_log is not None: self._define_tensorboard_metrics() self.checkpoint_folder = checkpoint_folder if self.checkpoint_folder is not None: self.saver = tf.train.Saver() self.play_data_range = play_data_range self.sess = None
def __init__( self, input_size, hidden_layer_size=512, n_hidden_layers=3, learning_rate=3e-4, tensorboard_log=None, ): self.input_size = input_size self._hidden_layer_size = hidden_layer_size self._n_hidden_layers = n_hidden_layers self._define_input_placeholders() self._define_model() self.loss = self._define_loss() self.learning_rate, self.global_step = get_learning_rate( learning_rate, None, 1) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.gradients = self.optimizer.compute_gradients(loss=self.loss) self.optimization_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step) self.tensorboard_log = tensorboard_log if self.tensorboard_log is not None: self._define_tensorboard_metrics() self.sess = None
def __init__( self, env, solver, experience_replay, tensorboard_log=None, learning_rate=3e-4, n_layers=10, layer_size=256, n_out=1, gauss_stdev=None, ): self.env = env self.solver = solver self.experience_replay = experience_replay self.dynamics = ExactDynamicsMujoco(env.unwrapped.spec.id, tolerance=1e-3, max_iters=100) assert len(self.env.action_space.shape) == 1 self.action_dim = self.env.action_space.shape[0] self.observation_shape = list(self.env.observation_space.shape) self.n_out = n_out self.gauss_stdev = gauss_stdev self.layer_size = layer_size self.n_layers = n_layers self._define_input_placeholders() self._define_model() self.loss = self._define_loss() self.learning_rate, self.global_step = get_learning_rate( learning_rate, None, 1) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.gradients = self.optimizer.compute_gradients(loss=self.loss) self.optimization_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step) self.tensorboard_log = tensorboard_log if self.tensorboard_log is not None: self._define_tensorboard_metrics() self.sess = None
def __init__( self, input_dim, state_size, n_layers=3, layer_size=256, learning_rate=1e-4, prior_stdev=1, divergence_factor=1, checkpoint_folder=None, tensorboard_log=None, ): self.input_dim = input_dim self.state_size = state_size self.n_layers = n_layers self.layer_size = layer_size self.activation = tf.nn.leaky_relu self.prior_stdev = prior_stdev self.divergence_factor = divergence_factor self._define_model() self.sess = None self.learning_rate, self.global_step = get_learning_rate( learning_rate, None, 1) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.gradients = self.optimizer.compute_gradients(loss=self.loss) self.optimization_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step) self.checkpoint_folder = checkpoint_folder if self.checkpoint_folder is not None: self.saver = tf.train.Saver() self.tensorboard_log = tensorboard_log if self.tensorboard_log is not None: self._define_tensorboard_metrics()
def __init__( self, env, tensorboard_log=None, checkpoint_folder=None, observation_dist="gaussian", pixel_observations=False, hidden_layer_size=200, n_hidden_layers=2, rnn_state_size=30, learning_rate=3e-4, obs_stddev=0.01, likelihood_scale=1, mujoco_video_out_path="mujoco", timesteps_training=20, fixed_latent_stddev=None, ): assert isinstance(env.action_space, gym.spaces.Box) self.env = env self.action_space_shape = list(self.env.action_space.shape) self.pixel_observations = pixel_observations self.observation_dist = observation_dist if self.pixel_observations: self.data_shape = [64, 64, 3] self._obs_stddev = 1 else: self.data_shape = list(self.env.observation_space.shape) self._obs_stddev = obs_stddev self._rnn_state_size = rnn_state_size self._num_layers = n_hidden_layers self._hidden_layer_size = hidden_layer_size self._min_stddev = 0.01 self.likelihood_scale = 1 # 1e-2 self._fixed_latent_stddev = fixed_latent_stddev self.state_size = 3 * self._rnn_state_size self.mujoco_video_out_path = mujoco_video_out_path self.timesteps = timesteps_training self.N_samples = 3 self.N_samples_for_gradient = 1 check_greater_equal("N_samples", self.N_samples, self.N_samples_for_gradient) if self.pixel_observations and self.observation_dist != "gaussian": raise ValueError( 'Pixel observations require the observation model to be "gaussian"' ) self.time_axis = tf.convert_to_tensor([1]) self._define_input_placeholders() self._define_model() self.loss = self._define_loss() self.learning_rate, self.global_step = get_learning_rate( learning_rate, None, 1) self.optimizer = tf.train.AdamOptimizer(self.learning_rate, epsilon=1e-4) self.gradients = self.optimizer.compute_gradients(loss=self.loss) self.gradients = [(tf.clip_by_norm(grad, 1000), var) for grad, var in self.gradients] self.optimization_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step) self.tensorboard_log = tensorboard_log if self.tensorboard_log is not None: self._define_tensorboard_metrics() self.checkpoint_folder = checkpoint_folder if self.checkpoint_folder is not None: self.saver = tf.train.Saver() self.policy = None self.inverse_policy = None self.sess = None