def __init__(self, optimizer=None, optimizer_args=None, observation_permutation=None, action_permutation=None, sym_loss_weight=0.0001, clip_param=0.2, adam_batchsize=128, adam_epochs=10, **kwargs): if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = LbfgsOptimizer(**optimizer_args) super(PPO_Clip_Sym, self).__init__(optimizer=optimizer, **kwargs) self.observation_permutation = observation_permutation self.action_permutation = action_permutation self.sym_loss_weight = sym_loss_weight self.clip_param = clip_param self.adam_batchsize = adam_batchsize self.adam_epochs = adam_epochs self.obs_perm_mat = np.zeros( (len(observation_permutation), len(observation_permutation))) self.act_per_mat = np.zeros( (len(action_permutation), len(action_permutation))) for i, perm in enumerate(self.observation_permutation): self.obs_perm_mat[i][int(np.abs(perm))] = np.sign(perm) for i, perm in enumerate(self.action_permutation): self.act_per_mat[i][int(np.abs(perm))] = np.sign(perm)
def __init__(self, optimizer=None, optimizer_args=None, positive_adv=None, **kwargs): Serializable.quick_init(self, locals()) if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = LbfgsOptimizer(**optimizer_args) super(ERWR, self).__init__( optimizer=optimizer, positive_adv=True if positive_adv is None else positive_adv, **kwargs)
def __init__( self, input_shape, output_dim, mean_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), std_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If adaptive_std is True, this parameter is ignored, and the weights for the std network are always learned. :param adaptive_std: Whether to make the std a function of the states. :param std_share_network: Whether to use the same network as the mean. :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if `std_share_network` is False. It defaults to the same architecture as the mean. :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network` is False. It defaults to the same non-linearity as the mean. """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self._optimizer = optimizer if mean_network is None: mean_network = MLP( input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = mean_network.output_layer if adaptive_std: l_log_std = MLP( input_shape=input_shape, input_var=mean_network.input_layer.input_var, output_dim=output_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_nonlinearity, output_nonlinearity=None, ).output_layer else: l_log_std = ParamLayer( mean_network.input_layer, num_units=output_dim, param=lasagne.init.Constant(np.log(init_std)), name="output_log_std", trainable=learn_std, ) LasagnePowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = TT.matrix("ys") old_means_var = TT.matrix("old_means") old_log_stds_var = TT.matrix("old_log_stds") x_mean_var = theano.shared(np.zeros((1, ) + input_shape), name="x_mean", broadcastable=(True, ) + (False, ) * len(input_shape)) x_std_var = theano.shared(np.ones((1, ) + input_shape), name="x_std", broadcastable=(True, ) + (False, ) * len(input_shape)) y_mean_var = theano.shared(np.zeros((1, output_dim)), name="y_mean", broadcastable=(True, False)) y_std_var = theano.shared(np.ones((1, output_dim)), name="y_std", broadcastable=(True, False)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var normalized_means_var = L.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) normalized_log_stds_var = L.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + TT.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - TT.log(y_std_var) dist = self._dist = DiagonalGaussian() normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = TT.mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -TT.mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = compile_function([xs_var], means_var) self._f_pdists = compile_function([xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[normalized_means_var, normalized_log_stds_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var
def __init__( self, input_shape, output_dim, predict_all=False, # CF prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer if prob_network is None: prob_network = GRUNetwork( input_shape=input_shape, output_dim=output_dim, hidden_dim=hidden_sizes[0], # this gives 32 by default hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) l_prob = prob_network.output_layer LasagnePowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = TT.itensor3("ys") old_prob_var = TT.tensor3("old_prob") x_mean_var = theano.shared( np.zeros( ( 1, 1, ) + input_shape ), # this syntax makes the shape (1,1,*input_shape,). The first is traj name="x_mean", broadcastable=( True, True, ) + (False, ) * len(input_shape)) x_std_var = theano.shared(np.ones(( 1, 1, ) + input_shape), name="x_std", broadcastable=( True, True, ) + (False, ) * len(input_shape)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var prob_var_all = L.get_output( l_prob, {prob_network.input_layer: normalized_xs_var}) if predict_all: prob_var = prob_var_all else: # take only last dim but keep the shape prob_var_last = TT.reshape( prob_var_all[:, -1, :], (TT.shape(prob_var_all)[0], 1, TT.shape(prob_var_all)[2])) # padd along the time dimension to obtain the same shape as before padded_prob_var = TT.tile(prob_var_last, (1, TT.shape(prob_var_all)[1], 1)) # give it the standard name prob_var = padded_prob_var old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) loss = -TT.mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted_flat = special.to_onehot_sym( TT.flatten(TT.argmax(prob_var, axis=-1)), output_dim) predicted = TT.reshape(predicted_flat, TT.shape(prob_var)) self._f_predict = ext.compile_function([xs_var], predicted) self._f_prob = ext.compile_function([xs_var], prob_var) self._prob_network = prob_network self._l_prob = l_prob optimizer_args = dict( loss=loss, target=self, network_outputs=[prob_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var
def __init__( self, input_shape, output_dim, prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer if prob_network is None: prob_network = MLP( input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) l_prob = prob_network.output_layer LasagnePowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = TT.imatrix("ys") old_prob_var = TT.matrix("old_prob") x_mean_var = theano.shared( np.zeros((1,) + input_shape), name="x_mean", broadcastable=(True,) + (False,) * len(input_shape) ) x_std_var = theano.shared( np.ones((1,) + input_shape), name="x_std", broadcastable=(True,) + (False,) * len(input_shape) ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) loss = - TT.mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = special.to_onehot_sym(TT.argmax(prob_var, axis=1), output_dim) self._f_predict = ext.compile_function([xs_var], predicted) self._f_prob = ext.compile_function([xs_var], prob_var) self._prob_network = prob_network self._l_prob = l_prob optimizer_args = dict( loss=loss, target=self, network_outputs=[prob_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var
raise ValueError('No values provided for param %s' % param_name) else: try: param_values = [[float(y) for y in x.split(',')] for x in args.param_val.split(',,')] except: param_values = [[y for y in x.split(',')] for x in args.param_val.split(',,')] ############################################################################ ## POSTPROCESSING OF PARAMETERS if args.logdir[-1] != '/': args.logdir += '/' if params['hide_baseline_net_params']['optimizer'] == 'LbfgsOptimizer': params['hide_baseline_net_params']['optimizer'] = LbfgsOptimizer( max_opt_itr=params['hide_baseline_net_params']['max_opt_itr']) params['hide_baseline_net_params'].pop('max_opt_itr', None) else: raise ValueError('Unknown optimizer: %s', params['hide_baseline_net_params']['optimizer']) if params['seek_baseline_net_params']['optimizer'] == 'LbfgsOptimizer': params['seek_baseline_net_params']['optimizer'] = LbfgsOptimizer( max_opt_itr=params['seek_baseline_net_params']['max_opt_itr']) params['seek_baseline_net_params'].pop('max_opt_itr', None) else: raise ValueError('Unknown optimizer: %s', params['seek_baseline_net_params']['optimizer']) ## All possible combinations of hyperparameters param_values.append(seeds)
def __init__( self, input_shape, output_dim, predict_all=False, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param predict_all: use the prediction made at every step about the latent variables (not only the last step) :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer p_network = GRUNetwork( input_shape=input_shape, output_dim=output_dim, hidden_dim=hidden_sizes[0], hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.sigmoid, ) l_p = p_network.output_layer # this is every intermediate latent state! but I only care about last LasagnePowered.__init__(self, [l_p]) xs_var = p_network.input_layer.input_var ys_var = TT.itensor3("ys") # this is 3D: (traj, time, lat_dim) old_p_var = TT.tensor3("old_p") x_mean_var = theano.shared(np.zeros(( 1, 1, ) + input_shape), name="x_mean", broadcastable=( True, True, ) + (False, ) * len(input_shape)) x_std_var = theano.shared(np.ones(( 1, 1, ) + input_shape), name="x_std", broadcastable=( True, True, ) + (False, ) * len(input_shape)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var # this is the previous p_var, from which I only want the last time-step padded along all time-steps p_var_all = L.get_output(l_p, {p_network.input_layer: normalized_xs_var}) # take only last dim but keep the shape p_var_last = TT.reshape( p_var_all[:, -1, :], (TT.shape(p_var_all)[0], 1, TT.shape(p_var_all)[2])) # padd along the time dimension to obtain the same shape as before padded_p = TT.tile(p_var_last, (1, TT.shape(p_var_all)[1], 1)) # give it the standard name if predict_all: p_var = p_var_all else: p_var = padded_p old_info_vars = dict(p=old_p_var) info_vars = dict( p=p_var ) # posterior of the latent at every step, wrt obs-act. Same along batch if recurrent dist = self._dist = Bernoulli(output_dim) mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) loss = -TT.mean(dist.log_likelihood_sym( ys_var, info_vars)) # regressor just wants to min -loglik of data ys predicted = p_var >= 0.5 self._f_predict = ext.compile_function([xs_var], predicted) self._f_p = ext.compile_function( [xs_var], p_var ) # for consistency with gauss_mlp_reg this should be ._f_pdists self._l_p = l_p optimizer_args = dict( loss=loss, target=self, network_outputs=[p_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_p_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var
def __init__( self, input_shape, output_dim, predict_all=True, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer p_network = MLP( input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.sigmoid, ) l_p = p_network.output_layer LasagnePowered.__init__(self, [l_p]) xs_var = p_network.input_layer.input_var ys_var = TT.imatrix("ys") old_p_var = TT.matrix("old_p") x_mean_var = theano.shared(np.zeros((1, ) + input_shape), name="x_mean", broadcastable=(True, ) + (False, ) * len(input_shape)) x_std_var = theano.shared(np.ones((1, ) + input_shape), name="x_std", broadcastable=(True, ) + (False, ) * len(input_shape)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var p_var = L.get_output(l_p, {p_network.input_layer: normalized_xs_var}) old_info_vars = dict(p=old_p_var) info_vars = dict( p=p_var ) # posterior of the latent at every step, wrt obs-act. Same along batch if recurrent dist = self._dist = Bernoulli(output_dim) mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) self._mean_kl = ext.compile_function( [xs_var, old_p_var], mean_kl) # if not using TR, still log KL loss = -TT.mean(dist.log_likelihood_sym( ys_var, info_vars)) # regressor just wants to min -loglik of data ys predicted = p_var >= 0.5 # this gives 0 or 1, depending what is closer to the p_var self._f_predict = ext.compile_function([xs_var], predicted) self._f_p = ext.compile_function( [xs_var], p_var ) # for consistency with gauss_mlp_reg this should be ._f_pdists self._l_p = l_p optimizer_args = dict( loss=loss, target=self, network_outputs=[p_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_p_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var