Exemple #1
0
    def __init__(
        self,
        env,
        experience_replay,
        backward=True,
        hidden_layer_size=1024,
        n_hidden_layers=3,
        learning_rate=3e-4,
        tensorboard_log=None,
        checkpoint_folder=None,
        latent_space=None,
        n_out_states=1,
        gauss_stdev=None,
        play_data_range=None,
    ):
        self.env = env
        self.experience_replay = experience_replay
        self.backward = backward

        self.action_space_shape = list(self.env.action_space.shape)
        self.n_out_states = n_out_states
        self.gauss_stdev = gauss_stdev
        # self.individual_likelihood_factor = 0.0001
        self._hidden_layer_size = hidden_layer_size
        self._n_hidden_layers = n_hidden_layers

        if latent_space == "gridworld":
            self.latent_space = GridworldSpace(env)
        else:
            self.latent_space = latent_space

        if self.latent_space is None:
            assert len(self.env.observation_space.shape) == 1
            self.state_size = self.env.observation_space.shape[0]
        else:
            self.state_size = self.latent_space.state_size

        self._define_input_placeholders()
        self._define_model()

        self.loss = self._define_loss()
        # self.learning_rate, self.global_step = get_learning_rate(1e-2, 20, 0.98)
        self.learning_rate, self.global_step = get_learning_rate(
            learning_rate, None, 1)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.gradients = self.optimizer.compute_gradients(loss=self.loss)
        self.optimization_op = self.optimizer.apply_gradients(
            self.gradients, global_step=self.global_step)

        self.tensorboard_log = tensorboard_log
        if self.tensorboard_log is not None:
            self._define_tensorboard_metrics()

        self.checkpoint_folder = checkpoint_folder
        if self.checkpoint_folder is not None:
            self.saver = tf.train.Saver()

        self.play_data_range = play_data_range
        self.sess = None
    def __init__(
        self,
        input_size,
        hidden_layer_size=512,
        n_hidden_layers=3,
        learning_rate=3e-4,
        tensorboard_log=None,
    ):
        self.input_size = input_size

        self._hidden_layer_size = hidden_layer_size
        self._n_hidden_layers = n_hidden_layers

        self._define_input_placeholders()
        self._define_model()

        self.loss = self._define_loss()
        self.learning_rate, self.global_step = get_learning_rate(
            learning_rate, None, 1)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.gradients = self.optimizer.compute_gradients(loss=self.loss)
        self.optimization_op = self.optimizer.apply_gradients(
            self.gradients, global_step=self.global_step)

        self.tensorboard_log = tensorboard_log
        if self.tensorboard_log is not None:
            self._define_tensorboard_metrics()

        self.sess = None
    def __init__(
        self,
        env,
        solver,
        experience_replay,
        tensorboard_log=None,
        learning_rate=3e-4,
        n_layers=10,
        layer_size=256,
        n_out=1,
        gauss_stdev=None,
    ):
        self.env = env
        self.solver = solver
        self.experience_replay = experience_replay
        self.dynamics = ExactDynamicsMujoco(env.unwrapped.spec.id,
                                            tolerance=1e-3,
                                            max_iters=100)

        assert len(self.env.action_space.shape) == 1
        self.action_dim = self.env.action_space.shape[0]
        self.observation_shape = list(self.env.observation_space.shape)
        self.n_out = n_out
        self.gauss_stdev = gauss_stdev
        self.layer_size = layer_size
        self.n_layers = n_layers

        self._define_input_placeholders()
        self._define_model()

        self.loss = self._define_loss()
        self.learning_rate, self.global_step = get_learning_rate(
            learning_rate, None, 1)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.gradients = self.optimizer.compute_gradients(loss=self.loss)
        self.optimization_op = self.optimizer.apply_gradients(
            self.gradients, global_step=self.global_step)

        self.tensorboard_log = tensorboard_log
        if self.tensorboard_log is not None:
            self._define_tensorboard_metrics()

        self.sess = None
    def __init__(
        self,
        input_dim,
        state_size,
        n_layers=3,
        layer_size=256,
        learning_rate=1e-4,
        prior_stdev=1,
        divergence_factor=1,
        checkpoint_folder=None,
        tensorboard_log=None,
    ):
        self.input_dim = input_dim
        self.state_size = state_size
        self.n_layers = n_layers
        self.layer_size = layer_size
        self.activation = tf.nn.leaky_relu
        self.prior_stdev = prior_stdev
        self.divergence_factor = divergence_factor

        self._define_model()
        self.sess = None

        self.learning_rate, self.global_step = get_learning_rate(
            learning_rate, None, 1)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.gradients = self.optimizer.compute_gradients(loss=self.loss)
        self.optimization_op = self.optimizer.apply_gradients(
            self.gradients, global_step=self.global_step)

        self.checkpoint_folder = checkpoint_folder
        if self.checkpoint_folder is not None:
            self.saver = tf.train.Saver()

        self.tensorboard_log = tensorboard_log
        if self.tensorboard_log is not None:
            self._define_tensorboard_metrics()
Exemple #5
0
    def __init__(
        self,
        env,
        tensorboard_log=None,
        checkpoint_folder=None,
        observation_dist="gaussian",
        pixel_observations=False,
        hidden_layer_size=200,
        n_hidden_layers=2,
        rnn_state_size=30,
        learning_rate=3e-4,
        obs_stddev=0.01,
        likelihood_scale=1,
        mujoco_video_out_path="mujoco",
        timesteps_training=20,
        fixed_latent_stddev=None,
    ):
        assert isinstance(env.action_space, gym.spaces.Box)
        self.env = env
        self.action_space_shape = list(self.env.action_space.shape)

        self.pixel_observations = pixel_observations
        self.observation_dist = observation_dist

        if self.pixel_observations:
            self.data_shape = [64, 64, 3]
            self._obs_stddev = 1
        else:
            self.data_shape = list(self.env.observation_space.shape)

        self._obs_stddev = obs_stddev
        self._rnn_state_size = rnn_state_size
        self._num_layers = n_hidden_layers
        self._hidden_layer_size = hidden_layer_size
        self._min_stddev = 0.01
        self.likelihood_scale = 1  # 1e-2
        self._fixed_latent_stddev = fixed_latent_stddev

        self.state_size = 3 * self._rnn_state_size

        self.mujoco_video_out_path = mujoco_video_out_path
        self.timesteps = timesteps_training
        self.N_samples = 3
        self.N_samples_for_gradient = 1
        check_greater_equal("N_samples", self.N_samples,
                            self.N_samples_for_gradient)

        if self.pixel_observations and self.observation_dist != "gaussian":
            raise ValueError(
                'Pixel observations require the observation model to be "gaussian"'
            )

        self.time_axis = tf.convert_to_tensor([1])

        self._define_input_placeholders()
        self._define_model()

        self.loss = self._define_loss()
        self.learning_rate, self.global_step = get_learning_rate(
            learning_rate, None, 1)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                                epsilon=1e-4)
        self.gradients = self.optimizer.compute_gradients(loss=self.loss)
        self.gradients = [(tf.clip_by_norm(grad, 1000), var)
                          for grad, var in self.gradients]
        self.optimization_op = self.optimizer.apply_gradients(
            self.gradients, global_step=self.global_step)

        self.tensorboard_log = tensorboard_log
        if self.tensorboard_log is not None:
            self._define_tensorboard_metrics()

        self.checkpoint_folder = checkpoint_folder
        if self.checkpoint_folder is not None:
            self.saver = tf.train.Saver()

        self.policy = None
        self.inverse_policy = None
        self.sess = None