def _initialize(self, ob_space, ac_space, preprocessor, ac_noise_std): self.ac_space = ac_space self.ac_noise_std = ac_noise_std self.preprocessor_shape = preprocessor.transform_shape(ob_space.shape) with tf.variable_scope(type(self).__name__) as scope: # Observation normalization. ob_mean = tf.get_variable( 'ob_mean', self.preprocessor_shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) ob_std = tf.get_variable( 'ob_std', self.preprocessor_shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) in_mean = tf.placeholder(tf.float32, self.preprocessor_shape) in_std = tf.placeholder(tf.float32, self.preprocessor_shape) self._set_ob_mean_std = U.function([in_mean, in_std], [], updates=[ tf.assign(ob_mean, in_mean), tf.assign(ob_std, in_std), ]) inputs = tf.placeholder( tf.float32, [None] + list(self.preprocessor_shape)) # TODO(ekl): we should do clipping in a standard RLlib preprocessor clipped_inputs = tf.clip_by_value( (inputs - ob_mean) / ob_std, -5.0, 5.0) # Policy network. dist_class, dist_dim = ModelCatalog.get_action_dist( self.ac_space, dist_type='deterministic') model = ModelCatalog.get_model(clipped_inputs, dist_dim) dist = dist_class(model.outputs) self._act = U.function([inputs], dist.sample()) return scope
def __init__(self, *args, **kwargs): self.args, self.kwargs = args, kwargs self.scope = self._initialize(*args, **kwargs) self.all_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope.name) self.trainable_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name) self.num_params = sum(int(np.prod(v.get_shape().as_list())) for v in self.trainable_variables) self._setfromflat = U.SetFromFlat(self.trainable_variables) self._getflat = U.GetFlat(self.trainable_variables) logger.info('Trainable variables ({} parameters)' .format(self.num_params)) for v in self.trainable_variables: shp = v.get_shape().as_list() logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp))) logger.info('All variables') for v in self.all_variables: shp = v.get_shape().as_list() logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp))) placeholders = [tf.placeholder(v.value().dtype, v.get_shape().as_list()) for v in self.all_variables] self.set_all_vars = U.function( inputs=placeholders, outputs=[], updates=[tf.group(*[v.assign(p) for v, p in zip(self.all_variables, placeholders)])] )