Ejemplo n.º 1
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 discrete=False,
                 learning_rate=1e-4,
                 training=True,
                 nn_baseline=False,
                 **kwargs):
        super().__init__(**kwargs)

        # init vars
        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.discrete = discrete
        self.size = size
        self.learning_rate = learning_rate
        self.training = training
        self.nn_baseline = nn_baseline

        if self.discrete:
            with tf.name_scope("logits") as scope:
                self.logits_na = tfu.build_mlp(input_size=self.ob_dim,
                                               output_size=self.ac_dim,
                                               n_layers=self.n_layers,
                                               size=self.size,
                                               scope=scope)
            self.mean_net = None
            self.logstd = None
        else:
            self.logits_na = None
            with tf.name_scope("mean") as scope:
                self.mean_net = tfu.build_mlp(input_size=self.ob_dim,
                                              output_size=self.ac_dim,
                                              n_layers=self.n_layers,
                                              size=self.size,
                                              scope=scope)
            self.logstd = tf.Variable(initial_value=tf.zeros(
                shape=self.ac_dim, dtype=tf.dtypes.float32),
                                      trainable=True,
                                      name="logstd")
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=self.learning_rate)
Ejemplo n.º 2
0
    def __init__(self, hparams, optimizer_spec, **kwargs):
        super().__init__(**kwargs)
        self.ob_dim = hparams['ob_dim']
        self.output_size = hparams['rnd_output_size']
        self.n_layers = hparams['rnd_n_layers']
        self.size = hparams['rnd_size']
        self.optimizer_spec = optimizer_spec

        # TODO: Create two neural networks:
        # 1) f, the random function we are trying to learn
        # 2) f_hat, the function we are using to learn f
        # WARNING: Make sure you use different types of weight
        #          initializations for these two functions

        # HINT 1) Check out the method ptu.build_mlp
        # HINT 2) There are two weight init methods defined above

        with tf.name_scope("f"):
            self.f = tfu.build_mlp(input_size=self.ob_dim,
                                   output_size=self.output_size,
                                   n_layers=self.n_layers,
                                   size=self.size,
                                   activation='tanh',
                                   output_activation='linear',
                                   init_method=init_method_1,
                                   scope="f")

        with tf.name_scope("f_hat"):
            self.f_hat = tfu.build_mlp(input_size=self.ob_dim,
                                       output_size=self.output_size,
                                       n_layers=self.n_layers,
                                       size=self.size,
                                       activation='tanh',
                                       output_activation='linear',
                                       init_method=init_method_2,
                                       scope="f_hat")

        self.learning_rate_scheduler = CustomLambdaLRSchedule(
            initialLR=self.optimizer_spec.optim_kwargs['learning_rate'],
            lr_lambda=self.optimizer_spec.learning_rate_schedule,
        )

        self.optimizer = self.optimizer_spec.constructor(
            self.learning_rate_scheduler)
Ejemplo n.º 3
0
    def __init__(self, ac_dim, ob_dim, n_layers, size, learning_rate=0.001):
        super(FFModel, self).__init__()

        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.size = size
        self.learning_rate = learning_rate
        self.delta_network = tfu.build_mlp(
            input_size=self.ob_dim + self.ac_dim,
            output_size=self.ob_dim,
            n_layers=self.n_layers,
            size=self.size,
        )
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=self.learning_rate)
        self.loss = tf.keras.losses.MSE
        self.obs_mean = None
        self.obs_std = None
        self.acs_mean = None
        self.acs_std = None
        self.delta_mean = None
        self.delta_std = None
    def __init__(self, hparams):
        super().__init__()
        self.ob_dim = hparams['ob_dim']
        self.ac_dim = hparams['ac_dim']
        self.discrete = hparams['discrete']
        self.size = hparams['size']
        self.n_layers = hparams['n_layers']
        self.learning_rate = hparams['learning_rate']

        # critic parameters
        self.num_target_updates = hparams['num_target_updates']
        self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update']
        self.gamma = hparams['gamma']
        with tf.name_scope('critic') as scope:
            self.critic_network = tfu.build_mlp(
                input_size=self.ob_dim,
                output_size=1,
                n_layers=self.n_layers,
                size=self.size,
                scope=scope
            )
        self.loss = tf.keras.losses.MeanSquaredError()
        self.loss_reports = tf.keras.metrics.Mean()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
Ejemplo n.º 5
0
    def __init__(self,
                 ac_dim,
                 ob_dim,
                 n_layers,
                 size,
                 discrete=False,
                 learning_rate=1e-4,
                 training=True,
                 nn_baseline=False,
                 **kwargs):
        super().__init__(**kwargs)

        # init vars
        self.ac_dim = ac_dim
        self.ob_dim = ob_dim
        self.n_layers = n_layers
        self.discrete = discrete
        self.size = size
        self.learning_rate = learning_rate
        self.training = training
        self.nn_baseline = nn_baseline

        if self.discrete:
            with tf.name_scope("logits") as scope:
                self.logits_na = tfu.build_mlp(input_size=self.ob_dim,
                                               output_size=self.ac_dim,
                                               n_layers=self.n_layers,
                                               size=self.size,
                                               scope=scope)
            self.mean_net = None
            self.logstd = None

            # Call to initialize the params
            assert self.logits_na.trainable_variables != []
            #self.logits_na(tf.random.normal(shape=(1, self.ob_dim)))
            self.policy_params = [*self.logits_na.trainable_variables]
        else:
            self.logits_na = None
            with tf.name_scope("mean") as scope:
                self.mean_net = tfu.build_mlp(input_size=self.ob_dim,
                                              output_size=self.ac_dim,
                                              n_layers=self.n_layers,
                                              size=self.size,
                                              scope=scope)
                self.logstd = tf.Variable(initial_value=tf.zeros(
                    shape=self.ac_dim, dtype=tf.dtypes.float32),
                                          trainable=True,
                                          name="logstd")

            # Call to initialize the params
            assert self.mean_net.trainable_variables != []
            # self.mean_net(tf.random.normal(shape=(1, self.ob_dim)))
            self.policy_params = [
                *self.mean_net.trainable_variables, self.logstd
            ]

        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=self.learning_rate)

        if nn_baseline:
            with tf.name_scope("baseline") as scope:
                self.baseline = tfu.build_mlp(input_size=self.ob_dim,
                                              output_size=1,
                                              n_layers=self.n_layers,
                                              size=self.size,
                                              scope=scope)
            self.baseline_optimizer = tf.keras.optimizers.Adam(
                self.learning_rate)
        else:
            self.baseline = None