def __init__(self, alive_coeff=1, ctrl_cost_coeff=0.01, *args, **kwargs): self.alive_coeff = alive_coeff self.ctrl_cost_coeff = ctrl_cost_coeff super(HopperEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals())
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Always call Serializable constructor last Serializable.quick_init(self, locals())
def __init__(self, input_shape, extra_input_shape, output_dim, hidden_sizes, conv_filters, conv_filter_sizes, conv_strides, conv_pads, name=None, extra_hidden_sizes=None, hidden_w_init=ly.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer(), output_w_init=ly.XavierUniformInitializer(), output_b_init=tf.zeros_initializer(), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, input_var=None, input_layer=None): Serializable.quick_init(self, locals()) if extra_hidden_sizes is None: extra_hidden_sizes = [] with tf.compat.v1.variable_scope(name, 'ConvMergeNetwork'): input_flat_dim = np.prod(input_shape) extra_input_flat_dim = np.prod(extra_input_shape) total_input_flat_dim = input_flat_dim + extra_input_flat_dim if input_layer is None: l_in = ly.InputLayer(shape=(None, total_input_flat_dim), input_var=input_var, name='input') else: l_in = input_layer l_conv_in = ly.reshape(ly.SliceLayer(l_in, indices=slice(input_flat_dim), name='conv_slice'), ([0], ) + input_shape, name='conv_reshaped') l_extra_in = ly.reshape(ly.SliceLayer(l_in, indices=slice( input_flat_dim, None), name='extra_slice'), ([0], ) + extra_input_shape, name='extra_reshaped') l_conv_hid = l_conv_in for idx, conv_filter, filter_size, stride, pad in zip( range(len(conv_filters)), conv_filters, conv_filter_sizes, conv_strides, conv_pads, ): l_conv_hid = ly.Conv2DLayer( l_conv_hid, num_filters=conv_filter, filter_size=filter_size, stride=(stride, stride), pad=pad, nonlinearity=hidden_nonlinearity, name='conv_hidden_%d' % idx, ) l_extra_hid = l_extra_in for idx, hidden_size in enumerate(extra_hidden_sizes): l_extra_hid = ly.DenseLayer( l_extra_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name='extra_hidden_%d' % idx, w=hidden_w_init, b=hidden_b_init, ) l_joint_hid = ly.concat( [ly.flatten(l_conv_hid, name='conv_hidden_flat'), l_extra_hid], name='joint_hidden') for idx, hidden_size in enumerate(hidden_sizes): l_joint_hid = ly.DenseLayer( l_joint_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name='joint_hidden_%d' % idx, w=hidden_w_init, b=hidden_b_init, ) l_out = ly.DenseLayer( l_joint_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name='output', w=output_w_init, b=output_b_init, ) self._l_in = l_in self._l_out = l_out LayersPowered.__init__(self, [l_out], input_layers=[l_in])
def __init__( self, n_bins=20, sensor_range=10., sensor_span=math.pi, maze_id=0, length=1, maze_height=0.5, maze_size_scaling=2, # a coef of 0 gives no reward to the maze from the wrapped env. coef_inner_rew=0., goal_rew=1., # reward obtained when reaching the goal *args, **kwargs): self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span self._maze_id = maze_id self.length = length self.coef_inner_rew = coef_inner_rew self.goal_rew = goal_rew model_cls = self.__class__.MODEL_CLASS if not model_cls: raise NotImplementedError("MODEL_CLASS unspecified!") xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") self.MAZE_HEIGHT = height = maze_height self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling self.MAZE_STRUCTURE = structure = construct_maze(maze_id=self._maze_id, length=self.length) torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x self._init_torso_y = torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 1") torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement(tree.find("."), "contact") for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement(contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j)) _, file_path = tempfile.mkstemp(suffix=".xml", text=True) tree.write( file_path ) # here we write a temporal file with the robot specifications. # Why not the original one?? self._goal_range = self._find_goal_range() self._cached_segments = None inner_env = model_cls(*args, file_path=file_path, **kwargs) # file to the robot specifications super().__init__(inner_env) # Redefine observation space shp = self.get_current_obs().shape ub = BIG * np.ones(shp) self.observation_space = gym.spaces.Box(ub * -1, ub, dtype=np.float32) # Always call Serializable constructor last Serializable.quick_init(self, locals())
def __init__(self, env_spec, name="CategoricalLSTMPolicy", hidden_dim=32, feature_network=None, prob_network=None, state_include_action=True, hidden_nonlinearity=tf.tanh, forget_bias=1.0, use_peepholes=False, lstm_layer_cls=L.LSTMLayer): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ assert isinstance(env_spec.action_space, Discrete) self._prob_network_name = "prob_network" with tf.variable_scope(name, "CategoricalLSTMPolicy"): Serializable.quick_init(self, locals()) super(CategoricalLSTMPolicy, self).__init__(env_spec) obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[ 0], input_shape[1], feature_dim)) if prob_network is None: prob_network = LSTMNetwork( input_shape=(feature_dim, ), input_layer=l_feature, output_dim=env_spec.action_space.n, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, forget_bias=forget_bias, use_peepholes=use_peepholes, lstm_layer_cls=lstm_layer_cls, name=self._prob_network_name) self.prob_network = prob_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action flat_input_var = tf.placeholder( dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: with tf.name_scope("feature_network", values=[flat_input_var]): feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) with tf.name_scope(self._prob_network_name, values=[feature_var]): out_prob_step, out_prob_hidden, out_step_cell = L.get_output( [ prob_network.step_output_layer, prob_network.step_hidden_layer, prob_network.step_cell_layer ], {prob_network.step_input_layer: feature_var}) self.f_step_prob = tensor_utils.compile_function([ flat_input_var, prob_network.step_prev_state_layer.input_var, ], [out_prob_step, out_prob_hidden, out_step_cell]) self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.name = name self.prev_actions = None self.prev_hiddens = None self.prev_cells = None self.dist = RecurrentCategorical(env_spec.action_space.n) out_layers = [prob_network.output_layer] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)
def __init__( self, env_spec, hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, std_hidden_nonlinearity=NL.tanh, hidden_nonlinearity=NL.tanh, output_nonlinearity=None, mean_network=None, std_network=None, dist_cls=DiagonalGaussian, ): """ :param env_spec: :param hidden_sizes: sizes list for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: sizes list for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :return: """ assert isinstance(env_spec.action_space, Box) Serializable.quick_init(self, locals()) obs_dim = env_spec.observation_space.flat_dim action_flat_dim = env_spec.action_space.flat_dim # create network if mean_network is None: mean_network = MLP( input_shape=(obs_dim, ), output_dim=action_flat_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, ) self._mean_network = mean_network l_mean = mean_network.output_layer obs_var = mean_network.input_layer.input_var if std_network is not None: l_log_std = std_network.output_layer else: if adaptive_std: std_network = MLP( input_shape=(obs_dim, ), input_layer=mean_network.input_layer, output_dim=action_flat_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=None, ) l_log_std = std_network.output_layer else: l_log_std = ParamLayer( mean_network.input_layer, num_units=action_flat_dim, param=lasagne.init.Constant(np.log(init_std)), name="output_log_std", trainable=learn_std, ) self.min_std = min_std mean_var, log_std_var = L.get_output([l_mean, l_log_std]) if self.min_std is not None: log_std_var = TT.maximum(log_std_var, np.log(min_std)) self._mean_var, self._log_std_var = mean_var, log_std_var self._l_mean = l_mean self._l_log_std = l_log_std self._dist = dist_cls(action_flat_dim) LasagnePowered.__init__(self, [l_mean, l_log_std]) super(GaussianMLPPolicy, self).__init__(env_spec) self._f_dist = tensor_utils.compile_function( inputs=[obs_var], outputs=[mean_var, log_std_var], )
def __getstate__(self): d = Serializable.__getstate__(self) d["_obs_mean"] = self._obs_mean d["_obs_var"] = self._obs_var return d
def __init__(self, env_spec, name="GaussianMLPPolicy", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp'): """ :param env_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parametrization: how the std should be parametrized. There are a few options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(env_spec.action_space, Box) StochasticPolicy.__init__(self, env_spec) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.action_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(init_std) if min_std: self._min_std_param = np.log(min_std) if max_std: self._max_std_param = np.log(max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(init_std) - 1) if min_std: self._min_std_param = np.log(np.exp(min_std) - 1) if max_std: self._max_std_param = np.log(np.exp(max_std) - 1) else: raise NotImplementedError # Build default graph with self._name_scope: # inputs self._obs_input = self.observation_space.new_tensor_variable( name="obs_input", extra_dims=1) with tf.name_scope("default", values=[self._obs_input]): action_var, mean_var, std_param_var, dist = self._build_graph( self._obs_input) # outputs self._action = tf.identity(action_var, name="action") self._action_mean = tf.identity(mean_var, name="action_mean") self._action_std_param = tf.identity(std_param_var, "action_std_param") self._action_distribution = dist # compiled functions with tf.variable_scope("f_dist"): self.f_dist = tensor_utils.compile_function( inputs=[self._obs_input], outputs=[ self._action, self._action_mean, self._action_std_param ], )
def copy(self): copy = Serializable.clone(self) ptu.copy_model_params_from_to(self, copy) return copy
def __init__(self, *args, **kwargs): super(AntEnv, self).__init__(*args, **kwargs) Serializable.__init__(self, *args, **kwargs)
def __init__(self, max_opt_itr=20, callback=None): Serializable.quick_init(self, locals()) self._max_opt_itr = max_opt_itr self._opt_fun = None self._target = None self._callback = callback
def __init__(self, env_spec, name='GaussianMLPPolicy', hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, mean_network=None, std_network=None, std_parametrization='exp'): """ :param env_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parametrization: how the std should be parametrized. There are a few options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(env_spec.action_space, akro.Box) Serializable.quick_init(self, locals()) self.name = name self._mean_network_name = 'mean_network' self._std_network_name = 'std_network' with tf.variable_scope(name, 'GaussianMLPPolicy'): obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim # create network if mean_network is None: if std_share_network: if std_parametrization == 'exp': init_std_param = np.log(init_std) elif std_parametrization == 'softplus': init_std_param = np.log(np.exp(init_std) - 1) else: raise NotImplementedError b = np.concatenate((np.zeros(action_dim), np.full(action_dim, init_std_param)), axis=0) b = tf.constant_initializer(b) with tf.variable_scope(self._mean_network_name): mean_network = MLP( name='mlp', input_shape=(obs_dim, ), output_dim=2 * action_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=b, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(action_dim), name='mean_slice', ) else: mean_network = MLP( name=self._mean_network_name, input_shape=(obs_dim, ), output_dim=action_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, ) l_mean = mean_network.output_layer self._mean_network = mean_network obs_var = mean_network.input_layer.input_var if std_network is not None: l_std_param = std_network.output_layer else: if std_parametrization == 'exp': init_std_param = np.log(init_std) elif std_parametrization == 'softplus': init_std_param = np.log(np.exp(init_std) - 1) else: raise ValueError('Invalid argument for std_parametrization' ': {}'.format(std_parametrization)) if adaptive_std: b = tf.constant_initializer(init_std_param) std_network = MLP( name=self._std_network_name, input_shape=(obs_dim, ), input_layer=mean_network.input_layer, output_dim=action_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=None, output_b_init=b, ) l_std_param = std_network.output_layer elif std_share_network: with tf.variable_scope(self._std_network_name): l_std_param = L.SliceLayer( mean_network.output_layer, slice(action_dim, 2 * action_dim), name='std_slice', ) else: with tf.variable_scope(self._std_network_name): l_std_param = L.ParamLayer( mean_network.input_layer, num_units=action_dim, param=tf.constant_initializer(init_std_param), name='output_std_param', trainable=learn_std, ) self.std_parametrization = std_parametrization if std_parametrization == 'exp': min_std_param = np.log(min_std) elif std_parametrization == 'softplus': min_std_param = np.log(np.exp(min_std) - 1) else: raise NotImplementedError self.min_std_param = min_std_param # mean_var, log_std_var = L.get_output([l_mean, l_std_param]) # # if self.min_std_param is not None: # log_std_var = tf.maximum(log_std_var, np.log(min_std)) # # self._mean_var, self._log_std_var = mean_var, log_std_var self._l_mean = l_mean self._l_std_param = l_std_param self._dist = DiagonalGaussian(action_dim) LayersPowered.__init__(self, [l_mean, l_std_param]) super(GaussianMLPPolicy, self).__init__(env_spec) dist_info_sym = self.dist_info_sym( mean_network.input_layer.input_var, dict()) mean_var = tf.identity(dist_info_sym['mean'], name='mean') log_std_var = tf.identity(dist_info_sym['log_std'], name='standard_dev') self._f_dist = tensor_utils.compile_function( inputs=[obs_var], outputs=[mean_var, log_std_var], )
def __init__(self, *args, **kwargs): super(PointEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals())
def __init__( self, input_shape, output_dim, name='BernoulliMLPRegressor', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, optimizer=None, tr_optimizer=None, use_trust_region=True, max_kl_step=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.compat.v1.variable_scope(name): if optimizer is None: optimizer = LbfgsOptimizer() if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer p_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.sigmoid, name='p_network') l_p = p_network.output_layer LayersPowered.__init__(self, [l_p]) xs_var = p_network.input_layer.input_var ys_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, output_dim), name='ys') old_p_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, output_dim), name='old_p') x_mean_var = tf.compat.v1.get_variable( name='x_mean', initializer=tf.zeros_initializer(), shape=(1, ) + input_shape) x_std_var = tf.compat.v1.get_variable( name='x_std', initializer=tf.ones_initializer(), shape=(1, ) + input_shape) normalized_xs_var = (xs_var - x_mean_var) / x_std_var p_var = L.get_output(l_p, {p_network.input_layer: normalized_xs_var}) old_info_vars = dict(p=old_p_var) info_vars = dict(p=p_var) dist = self._dist = Bernoulli(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = p_var >= 0.5 self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_p = tensor_utils.compile_function([xs_var], p_var) self.l_p = l_p self.optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var, old_p_var], leq_constraint=(mean_kl, max_kl_step)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
def __init__(self, target=DEFAULT_TARGET, *args, **kwargs): self.target = target kwargs['file_path'] = mujoco_model_path(FILE) super(PR2ArmClockEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals())
def __setstate__(self, d): Serializable.__setstate__(self, d) self.set_param_values(d["params"])
def __init__(self): Serializable.quick_init(self, locals())
def __init__(self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, hidden_w_init=lasagne.init.HeUniform(), hidden_b_init=lasagne.init.Constant(0.), action_merge_layer=-2, output_nonlinearity=None, output_w_init=lasagne.init.Uniform(-3e-3, 3e-3), output_b_init=lasagne.init.Uniform(-3e-3, 3e-3), bn=False): Serializable.quick_init(self, locals()) l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs") l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions") n_layers = len(hidden_sizes) + 1 if n_layers > 1: action_merge_layer = \ (action_merge_layer % n_layers + n_layers) % n_layers else: action_merge_layer = 1 l_hidden = l_obs for idx, size in enumerate(hidden_sizes): if bn: l_hidden = batch_norm(l_hidden) if idx == action_merge_layer: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_hidden = L.DenseLayer(l_hidden, num_units=size, W=hidden_w_init, b=hidden_b_init, nonlinearity=hidden_nonlinearity, name="h%d" % (idx + 1)) if action_merge_layer == n_layers: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_output = L.DenseLayer(l_hidden, num_units=1, W=output_w_init, b=output_b_init, nonlinearity=output_nonlinearity, name="output") output_var = L.get_output(l_output, deterministic=True).flatten() self._f_qval = tensor_utils.compile_function( [l_obs.input_var, l_action.input_var], output_var) self._output_layer = l_output self._obs_layer = l_obs self._action_layer = l_action self._output_nonlinearity = output_nonlinearity LasagnePowered.__init__(self, [l_output])
def __init__( self, input_shape, output_dim, name='CategoricalMLPRegressor', prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, tr_optimizer=None, use_trust_region=True, max_kl_step=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.variable_scope(name, 'CategoricalMLPRegressor'): if optimizer is None: optimizer = LbfgsOptimizer() if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer self._prob_network_name = 'prob_network' if prob_network is None: prob_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, name=self._prob_network_name) l_prob = prob_network.output_layer LayersPowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name='ys') old_prob_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name='old_prob') x_mean_var = tf.get_variable(name='x_mean', shape=(1, ) + input_shape, initializer=tf.constant_initializer( 0., dtype=tf.float32)) x_std_var = tf.get_variable(name='x_std', shape=(1, ) + input_shape, initializer=tf.constant_initializer( 1., dtype=tf.float32)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var with tf.name_scope(self._prob_network_name, values=[normalized_xs_var]): prob_var = L.get_output( l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = tf.one_hot(tf.argmax(prob_var, axis=1), depth=output_dim) self.prob_network = prob_network self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_prob = tensor_utils.compile_function([xs_var], prob_var) self.l_prob = l_prob self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var, old_prob_var], leq_constraint=(mean_kl, max_kl_step)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
def __getstate__(self): d = Serializable.__getstate__(self) d["params"] = self.get_param_values() return d
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_var = d["_obs_var"]
def __setstate__(self, d): Serializable.__setstate__(self, d) global load_params if load_params: self.set_param_values(d["params"])
def run_task(*_): # Configure TF session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config).as_default() as tf_session: ## Load data from itr_N.pkl with open(snapshot_file, 'rb') as file: saved_data = dill.load(file) ## Construct PathTrie and find missing skill description # This is basically ASA.decide_new_skill min_length = 2 max_length = 4 action_map = {i: ch for i, ch in enumerate('ABCDEFGHIJKLM^>v<') } # for Gridworld 13reg min_f_score = 1 max_results = 10 aggregations = [ ] # sublist of ['mean', 'most_freq', 'nearest_mean', 'medoid'] or 'all' paths = saved_data['paths'] path_trie = PathTrie(saved_data['hrl_policy'].num_skills) for path in paths: actions = path['actions'].argmax(axis=1).tolist() observations = path['observations'] path_trie.add_all_subpaths(actions, observations, min_length=min_length, max_length=max_length) logger.log('Searched {} rollouts'.format(len(paths))) frequent_paths = path_trie.items( action_map=action_map, min_count=10, # len(paths) * 2 min_f_score=min_f_score, max_results=max_results, aggregations=aggregations) logger.log( 'Found {} frequent paths: [index, actions, count, f-score]'.format( len(frequent_paths))) for i, f_path in enumerate(frequent_paths): logger.log(' {:2}: {:{pad}}\t{}\t{:.3f}'.format( i, f_path['actions_text'], f_path['count'], f_path['f_score'], pad=max_length)) top_subpath = frequent_paths[0] start_obss = top_subpath['start_observations'] end_obss = top_subpath['end_observations'] ## Prepare elements for training # Environment base_env = saved_data[ 'env'].env.env # <NormalizedEnv<GridworldGathererEnv instance>> skill_learning_env = TfEnv( SkillLearningEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=base_env, start_obss=start_obss, end_obss=end_obss)) # Skill policy hrl_policy = saved_data['hrl_policy'] new_skill_policy, new_skill_id = hrl_policy.create_new_skill( end_obss=end_obss) # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline low_algo_kwargs = dict(saved_data['low_algo_kwargs']) baseline_to_clone = low_algo_kwargs.get('baseline', saved_data['baseline']) baseline = Serializable.clone( # to create blank baseline obj=baseline_to_clone, name='{}Skill{}'.format( type(baseline_to_clone).__name__, new_skill_id)) low_algo_kwargs['baseline'] = baseline low_algo_cls = saved_data['low_algo_cls'] # Set custom training params (should`ve been set in asa_basic_run) low_algo_kwargs['batch_size'] = 20000 low_algo_kwargs[ 'max_path_length'] = 800 # maximum distance in map is 108 low_algo_kwargs['n_itr'] = 300 low_algo_kwargs['discount'] = 0.99 # Algorithm algo = low_algo_cls(env=skill_learning_env, policy=new_skill_policy, **low_algo_kwargs) # Logger parameters logger_snapshot_dir_before = logger.get_snapshot_dir() logger_snapshot_mode_before = logger.get_snapshot_mode() logger_snapshot_gap_before = logger.get_snapshot_gap() # No need to change snapshot dir in this script, it is used in ASA-algo.make_new_skill() # logger.set_snapshot_dir(os.path.join( # logger_snapshot_dir_before, # 'skill{}'.format(new_skill_id) # )) logger.set_snapshot_mode('none') logger.set_tensorboard_step_key('Iteration') ## Train new skill with logger.prefix('Skill {} | '.format(new_skill_id)): algo.train(sess=tf_session) ## Save new policy and its end_obss (we`ll construct skill stopping function # from them manually in asa_resume_with_new_skill.py) out_file = os.path.join(logger.get_snapshot_dir(), 'final.pkl') with open(out_file, 'wb') as file: out_data = {'policy': new_skill_policy, 'subpath': top_subpath} dill.dump(out_data, file) # Restore logger parameters logger.set_snapshot_dir(logger_snapshot_dir_before) logger.set_snapshot_mode(logger_snapshot_mode_before) logger.set_snapshot_gap(logger_snapshot_gap_before)
def __init__(self, env_spec, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes=(64, 64), pool_size=2, name="ContinuousConvPolicy", hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh, input_include_goal=False, weight_normalization=False, pooling=False, bn=False): """ Initialize class with multiple attributes. Args: env_spec(): hidden_sizes(list or tuple, optional): A list of numbers of hidden units for all hidden layers. name(str, optional): A str contains the name of the policy. hidden_nonlinearity(optional): An activation shared by all fc layers. output_nonlinearity(optional): An activation used by the output layer. bn(bool, optional): A bool to indicate whether normalize the layer or not. """ assert isinstance(env_spec.action_space, Box) Serializable.quick_init(self, locals()) super(ContinuousConvPolicy, self).__init__(env_spec) self.name = name self._env_spec = env_spec if input_include_goal: self._obs_dim = env_spec.observation_space.flat_dim_with_keys( ["observation", "desired_goal"]) else: self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim self._action_bound = env_spec.action_space.high self._conv_filters = conv_filters self._conv_filter_sizes = conv_filter_sizes self._conv_strides = conv_strides self._conv_pads = conv_pads self._hidden_sizes = hidden_sizes self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._batch_norm = bn self._weight_normalization = weight_normalization self._pooling = pooling self._pool_size = pool_size self._policy_network_name = "policy_network" # Build the network and initialized as Parameterized self._f_prob_online, self._output_layer, self._obs_layer = self.build_net( # noqa: E501 name=self.name) LayersPowered.__init__(self, [self._output_layer])
def __init__( self, input_shape, output_dim, prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer if prob_network is None: prob_network = MLP( input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) l_prob = prob_network.output_layer LasagnePowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = TT.imatrix("ys") old_prob_var = TT.matrix("old_prob") x_mean_var = theano.shared( np.zeros((1, ) + input_shape), name="x_mean", broadcastable=(True, ) + (False, ) * len(input_shape)) x_std_var = theano.shared( np.ones((1, ) + input_shape), name="x_std", broadcastable=(True, ) + (False, ) * len(input_shape)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) loss = -TT.mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = special.to_onehot_sym( TT.argmax(prob_var, axis=1), output_dim) self._f_predict = ext.compile_function([xs_var], predicted) self._f_prob = ext.compile_function([xs_var], prob_var) self._prob_network = prob_network self._l_prob = l_prob optimizer_args = dict( loss=loss, target=self, network_outputs=[prob_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var
def __init__(self, input_shape, output_dim, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, name='GaussianConvRegressor', mean_network=None, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_conv_filters=[], std_conv_filter_sizes=[], std_conv_strides=[], std_conv_pads=[], std_hidden_sizes=[], std_hidden_nonlinearity=None, std_output_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1., optimizer=None, optimizer_args=dict(), use_trust_region=True, max_kl_step=0.01): Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = 'mean_network' self._std_network_name = 'std_network' with tf.variable_scope(name): if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: b = np.concatenate( [ np.zeros(output_dim), np.full(output_dim, np.log(init_std)) ], axis=0) # yapf: disable b = tf.constant_initializer(b) mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=2 * output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=b) l_mean = layers.SliceLayer( mean_network.output_layer, slice(output_dim), name='mean_slice', ) else: mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity) l_mean = mean_network.output_layer if adaptive_std: l_log_std = ConvNetwork( name=self._std_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=std_conv_filters, conv_filter_sizes=std_conv_filter_sizes, conv_strides=std_conv_strides, conv_pads=std_conv_pads, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=std_output_nonlinearity, output_b_init=tf.constant_initializer(np.log(init_std)), ).output_layer elif std_share_network: l_log_std = layers.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name='log_std_slice', ) else: l_log_std = layers.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), trainable=learn_std, name=self._std_network_name, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, name='ys', shape=(None, output_dim)) old_means_var = tf.placeholder(dtype=tf.float32, name='ys', shape=(None, output_dim)) old_log_stds_var = tf.placeholder(dtype=tf.float32, name='old_log_stds', shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, np.prod(input_shape)), dtype=np.float32), name='x_mean', ) x_std_var = tf.Variable( np.ones((1, np.prod(input_shape)), dtype=np.float32), name='x_std', ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name='y_mean', ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name='y_std', ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope(self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = layers.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope(self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = layers.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args['leq_constraint'] = (mean_kl, max_kl_step) optimizer_args['inputs'] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args['inputs'] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var
def __init__( self, env_spec, name="GaussianLSTMPolicy", hidden_dim=32, feature_network=None, state_include_action=True, hidden_nonlinearity=tf.tanh, learn_std=True, init_std=1.0, output_nonlinearity=None, lstm_layer_cls=L.LSTMLayer, use_peepholes=False, std_share_network=False, ): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ assert isinstance(env_spec.action_space, Box) self._mean_network_name = "mean_network" self._std_network_name = "std_network" with tf.variable_scope(name, "GaussianLSTMPolicy"): Serializable.quick_init(self, locals()) super(GaussianLSTMPolicy, self).__init__(env_spec) obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[ 0], input_shape[1], feature_dim)) if std_share_network: mean_network = LSTMNetwork( input_shape=(feature_dim, ), input_layer=l_feature, output_dim=2 * action_dim, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, lstm_layer_cls=lstm_layer_cls, name="lstm_mean_network", use_peepholes=use_peepholes, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(action_dim), name="mean_slice", ) l_step_mean = L.SliceLayer( mean_network.step_output_layer, slice(action_dim), name="step_mean_slice", ) l_log_std = L.SliceLayer( mean_network.output_layer, slice(action_dim, 2 * action_dim), name="log_std_slice", ) l_step_log_std = L.SliceLayer( mean_network.step_output_layer, slice(action_dim, 2 * action_dim), name="step_log_std_slice", ) else: mean_network = LSTMNetwork( input_shape=(feature_dim, ), input_layer=l_feature, output_dim=action_dim, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, lstm_layer_cls=lstm_layer_cls, name="lstm_mean_network", use_peepholes=use_peepholes, ) l_mean = mean_network.output_layer l_step_mean = mean_network.step_output_layer l_log_std = L.ParamLayer( mean_network.input_layer, num_units=action_dim, param=tf.constant_initializer(np.log(init_std)), name="output_log_std", trainable=learn_std, ) l_step_log_std = L.ParamLayer( mean_network.step_input_layer, num_units=action_dim, param=l_log_std.param, name="step_output_log_std", trainable=learn_std, ) self.mean_network = mean_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action self.name = name flat_input_var = tf.placeholder( dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) with tf.name_scope(self._mean_network_name, values=[feature_var]): (out_step_mean, out_step_hidden, out_mean_cell) = L.get_output( [ l_step_mean, mean_network.step_hidden_layer, mean_network.step_cell_layer ], {mean_network.step_input_layer: feature_var}) out_step_mean = tf.identity(out_step_mean, "step_mean") out_step_hidden = tf.identity(out_step_hidden, "step_hidden") out_mean_cell = tf.identity(out_mean_cell, "mean_cell") with tf.name_scope(self._std_network_name, values=[feature_var]): out_step_log_std = L.get_output( l_step_log_std, {mean_network.step_input_layer: feature_var}) out_step_log_std = tf.identity(out_step_log_std, "step_log_std") self.f_step_mean_std = tensor_utils.compile_function([ flat_input_var, mean_network.step_prev_state_layer.input_var, ], [ out_step_mean, out_step_log_std, out_step_hidden, out_mean_cell ]) self.l_mean = l_mean self.l_log_std = l_log_std self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.prev_actions = None self.prev_hiddens = None self.prev_cells = None self.dist = RecurrentDiagonalGaussian(action_dim) out_layers = [l_mean, l_log_std] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)
def __init__( self, name, input_shape, output_dim, hidden_sizes, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_nonlinearity=NL.rectify, mean_network=None, optimizer=None, use_trust_region=True, step_size=0.01, subsample_factor=1.0, batchsize=None, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_conv_filters=[], std_conv_filters_sizes=[], std_conv_strides=[], std_conv_pads=[], std_hidden_sizes=(32, 32), std_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, ): """ :param input_shape: usually for images of the form (width,height,channel) :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If adaptive_std is True, this parameter is ignored, and the weights for the std network are always learned. :param adaptive_std: Whether to make the std a function of the states. :param std_share_network: Whether to use the same network as the mean. :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if `std_share_network` is False. It defaults to the same architecture as the mean. :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network` is False. It defaults to the same non-linearity as the mean. """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer("optimizer") else: optimizer = LbfgsOptimizer("optimizer") self._optimizer = optimizer self.input_shape = input_shape if mean_network is None: mean_network = ConvNetwork( name="mean_network", input_shape=input_shape, output_dim=output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = mean_network.output_layer if adaptive_std: l_log_std = ConvNetwork( name="log_std_network", input_shape=input_shape, input_var=mean_network.input_layer.input_var, output_dim=output_dim, conv_filters=std_conv_filters, conv_filter_sizes=std_conv_filter_sizes, conv_strides=std_conv_strides, conv_pads=std_conv_pads, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_nonlinearity, output_nonlinearity=None, ).output_layer else: l_log_std = ParamLayer( mean_network.input_layer, num_units=output_dim, param=lasagne.init.Constant(np.log(init_std)), name="output_log_std", trainable=learn_std, ) LasagnePowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = TT.matrix("ys") old_means_var = TT.matrix("old_means") old_log_stds_var = TT.matrix("old_log_stds") x_mean_var = theano.shared( np.zeros((1, np.prod(input_shape)), dtype=theano.config.floatX), name="x_mean", broadcastable=(True, False), ) x_std_var = theano.shared( np.ones((1, np.prod(input_shape)), dtype=theano.config.floatX), name="x_std", broadcastable=(True, False), ) y_mean_var = theano.shared(np.zeros((1, output_dim), dtype=theano.config.floatX), name="y_mean", broadcastable=(True, False)) y_std_var = theano.shared(np.ones((1, output_dim), dtype=theano.config.floatX), name="y_std", broadcastable=(True, False)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var normalized_means_var = L.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) normalized_log_stds_var = L.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + TT.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - TT.log(y_std_var) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = TT.mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = - \ TT.mean(dist.log_likelihood_sym( normalized_ys_var, normalized_dist_info_vars)) self._f_predict = compile_function([xs_var], means_var) self._f_pdists = compile_function([xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[normalized_means_var, normalized_log_stds_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var self._subsample_factor = subsample_factor self._batchsize = batchsize
def __init__(self, input_shape, output_dim, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes, hidden_nonlinearity, output_nonlinearity, name=None, hidden_w_init=ly.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer(), output_w_init=ly.XavierUniformInitializer(), output_b_init=tf.zeros_initializer(), input_var=None, input_layer=None, batch_normalization=False, weight_normalization=False): Serializable.quick_init(self, locals()) """ A network composed of several convolution layers followed by some fc layers. input_shape: (width,height,channel) HOWEVER, network inputs are assumed flattened. This network will first unflatten the inputs and then apply the standard convolutions and so on. conv_filters: a list of numbers of convolution kernel conv_filter_sizes: a list of sizes (int) of the convolution kernels conv_strides: a list of strides (int) of the conv kernels conv_pads: a list of pad formats (either 'SAME' or 'VALID') hidden_nonlinearity: a nonlinearity from tf.nn, shared by all conv and fc layers hidden_sizes: a list of numbers of hidden units for all fc layers """ with tf.compat.v1.variable_scope(name, 'ConvNetwork'): if input_layer is not None: l_in = input_layer l_hid = l_in elif len(input_shape) == 3: l_in = ly.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var, name='input') l_hid = ly.reshape(l_in, ([0], ) + input_shape, name='reshape_input') elif len(input_shape) == 2: l_in = ly.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var, name='input') input_shape = (1, ) + input_shape l_hid = ly.reshape(l_in, ([0], ) + input_shape, name='reshape_input') else: l_in = ly.InputLayer(shape=(None, ) + input_shape, input_var=input_var, name='input') l_hid = l_in if batch_normalization: l_hid = ly.batch_norm(l_hid) for idx, conv_filter, filter_size, stride, pad in zip( range(len(conv_filters)), conv_filters, conv_filter_sizes, conv_strides, conv_pads, ): l_hid = ly.Conv2DLayer( l_hid, num_filters=conv_filter, filter_size=filter_size, stride=(stride, stride), pad=pad, nonlinearity=hidden_nonlinearity, name='conv_hidden_%d' % idx, weight_normalization=weight_normalization, ) if batch_normalization: l_hid = ly.batch_norm(l_hid) if output_nonlinearity == ly.spatial_expected_softmax: assert not hidden_sizes assert output_dim == conv_filters[-1] * 2 l_hid.nonlinearity = tf.identity l_out = ly.SpatialExpectedSoftmaxLayer(l_hid) else: l_hid = ly.flatten(l_hid, name='conv_flatten') for idx, hidden_size in enumerate(hidden_sizes): l_hid = ly.DenseLayer( l_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name='hidden_%d' % idx, w=hidden_w_init, b=hidden_b_init, weight_normalization=weight_normalization, ) if batch_normalization: l_hid = ly.batch_norm(l_hid) l_out = ly.DenseLayer( l_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name='output', w=output_w_init, b=output_b_init, weight_normalization=weight_normalization, ) if batch_normalization: l_out = ly.batch_norm(l_out) self._l_in = l_in self._l_out = l_out # self._input_var = l_in.input_var LayersPowered.__init__(self, l_out)
def __init__(self, name): Serializable.quick_init(self, locals()) with tf.variable_scope(name): self.w = tf.get_variable('w', [10, 10])