def __init__(self, input_shape, output_dim, name='GaussianMLPRegressorWithModel', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, optimizer_args=None, use_trust_region=True, max_kl_step=0.01, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), std_nonlinearity=None, layer_normalization=False, normalize_inputs=True, normalize_outputs=True, subsample_factor=1.0): super().__init__(input_shape, output_dim, name) self._use_trust_region = use_trust_region self._subsample_factor = subsample_factor self._max_kl_step = max_kl_step self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs with tf.variable_scope(self._name, reuse=False) as self._variable_scope: if optimizer_args is None: optimizer_args = dict() if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self.model = GaussianMLPRegressorModel( input_shape=input_shape, output_dim=self._output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, learn_std=learn_std, adaptive_std=adaptive_std, std_share_network=std_share_network, init_std=init_std, min_std=None, max_std=None, std_hidden_sizes=std_hidden_sizes, std_hidden_nonlinearity=std_nonlinearity, std_output_nonlinearity=None, std_parameterization='exp', layer_normalization=layer_normalization) self._initialize()
def __init__(self, optimizer=None, optimizer_args=None, step_size=0.01, name=None, **kwargs): if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = PenaltyLbfgsOptimizer(**optimizer_args) self.optimizer = optimizer self.step_size = step_size self.name = name super(NPO, self).__init__(**kwargs)
def __init__(self, input_shape, output_dim, name="GaussianMLPRegressor", mean_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, optimizer_args=None, use_trust_region=True, max_kl_step=0.01, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), std_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1.0): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If adaptive_std is True, this parameter is ignored, and the weights for the std network are always earned. :param adaptive_std: Whether to make the std a function of the states. :param std_share_network: Whether to use the same network as the mean. :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if `std_share_network` is False. It defaults to the same architecture as the mean. :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network` is False. It defaults to the same non-linearity as the mean. """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = "mean_network" self._std_network_name = "std_network" with tf.variable_scope(name): if optimizer_args is None: optimizer_args = dict() if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=2 * output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(output_dim), name="mean_slice", ) else: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = mean_network.output_layer if adaptive_std: l_log_std = MLP( name="log_std_network", input_shape=input_shape, input_var=mean_network.input_layer.input_var, output_dim=output_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_nonlinearity, output_nonlinearity=None, ).output_layer elif std_share_network: l_log_std = L.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name="log_std_slice", ) else: l_log_std = L.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), name="output_log_std", trainable=learn_std, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_means_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_log_stds_var = tf.placeholder(dtype=tf.float32, name="old_log_stds", shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, ) + input_shape, dtype=np.float32), name="x_mean", ) x_std_var = tf.Variable( np.ones((1, ) + input_shape, dtype=np.float32), name="x_std", ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name="y_mean", ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name="y_std", ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope(self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = L.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope(self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = L.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, max_kl_step) optimizer_args["inputs"] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var # Optionally create assign operations for normalization if self._normalize_inputs: self._x_mean_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._x_std_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._assign_x_mean = tf.assign(self._x_mean_var, self._x_mean_var_ph) self._assign_x_std = tf.assign(self._x_std_var, self._x_std_var_ph) if self._normalize_outputs: self._y_mean_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._y_std_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._assign_y_mean = tf.assign(self._y_mean_var, self._y_mean_var_ph) self._assign_y_std = tf.assign(self._y_std_var, self._y_std_var_ph)
def __init__(self, input_shape, output_dim, filter_dims, num_filters, strides, padding, hidden_sizes, hidden_nonlinearity=tf.nn.tanh, hidden_w_init=tf.glorot_uniform_initializer(), hidden_b_init=tf.zeros_initializer(), output_nonlinearity=None, output_w_init=tf.glorot_uniform_initializer(), output_b_init=tf.zeros_initializer(), name='GaussianCNNRegressorWithModel', learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_filter_dims=[], std_num_filters=[], std_strides=[], std_padding='SAME', std_hidden_sizes=[], std_hidden_nonlinearity=None, std_output_nonlinearity=None, layer_normalization=False, normalize_inputs=True, normalize_outputs=True, subsample_factor=1., optimizer=None, optimizer_args=dict(), use_trust_region=True, max_kl_step=0.01): super().__init__(input_shape, output_dim, name) self._use_trust_region = use_trust_region self._subsample_factor = subsample_factor self._max_kl_step = max_kl_step self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs with tf.compat.v1.variable_scope(self._name, reuse=False) as vs: self._variable_scope = vs if optimizer_args is None: optimizer_args = dict() if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self.model = GaussianCNNRegressorModel( input_shape=input_shape, output_dim=output_dim, num_filters=num_filters, filter_dims=filter_dims, strides=strides, padding=padding, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, adaptive_std=adaptive_std, std_share_network=std_share_network, init_std=init_std, min_std=None, max_std=None, std_num_filters=std_num_filters, std_filter_dims=std_filter_dims, std_strides=std_strides, std_padding=std_padding, std_hidden_sizes=std_hidden_sizes, std_hidden_nonlinearity=std_hidden_nonlinearity, std_output_nonlinearity=std_output_nonlinearity, std_parameterization='exp', layer_normalization=layer_normalization) self._initialize()
def __init__(self, input_shape, output_dim, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, name='GaussianConvRegressor', mean_network=None, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_conv_filters=[], std_conv_filter_sizes=[], std_conv_strides=[], std_conv_pads=[], std_hidden_sizes=[], std_hidden_nonlinearity=None, std_output_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1., optimizer=None, optimizer_args=dict(), use_trust_region=True, max_kl_step=0.01): Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = 'mean_network' self._std_network_name = 'std_network' with tf.compat.v1.variable_scope(name): if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: b = np.concatenate( [ np.zeros(output_dim), np.full(output_dim, np.log(init_std)) ], axis=0) # yapf: disable b = tf.constant_initializer(b) mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=2 * output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=b) l_mean = layers.SliceLayer( mean_network.output_layer, slice(output_dim), name='mean_slice', ) else: mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity) l_mean = mean_network.output_layer if adaptive_std: l_log_std = ConvNetwork( name=self._std_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=std_conv_filters, conv_filter_sizes=std_conv_filter_sizes, conv_strides=std_conv_strides, conv_pads=std_conv_pads, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=std_output_nonlinearity, output_b_init=tf.constant_initializer(np.log(init_std)), ).output_layer elif std_share_network: l_log_std = layers.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name='log_std_slice', ) else: l_log_std = layers.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), trainable=learn_std, name=self._std_network_name, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_means_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_log_stds_var = tf.compat.v1.placeholder( dtype=tf.float32, name='old_log_stds', shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, np.prod(input_shape)), dtype=np.float32), name='x_mean', ) x_std_var = tf.Variable( np.ones((1, np.prod(input_shape)), dtype=np.float32), name='x_std', ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name='y_mean', ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name='y_std', ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope( self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = layers.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope( self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = layers.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.math.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = ( old_log_stds_var - tf.math.log(y_std_var)) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict( mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict( mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args['leq_constraint'] = (mean_kl, max_kl_step) optimizer_args['inputs'] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args['inputs'] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var