def __init__(self, mdp_cls, mdp_args): Serializable.quick_init(self, locals()) self.mdp_cls = mdp_cls self.mdp_args = dict(mdp_args) self.mdp_args["template_args"] = dict(noise=True) mdp = self.gen_mdp() super(IdentificationEnv, self).__init__(mdp)
def __init__(self, name, max_opt_itr=20, callback=None): Serializable.quick_init(self, locals()) self._name = name self._max_opt_itr = max_opt_itr self._opt_fun = None self._target = None self._callback = callback
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True, force_reset=False): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.") else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) self.env = env self.env_id = env.spec.id monitor_manager.logger.setLevel(logging.WARNING) assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) self._action_space = convert_gym_space(env.action_space) self._horizon = env.spec.timestep_limit self._log_dir = log_dir self._force_reset = force_reset
def __init__(self, regressors): """ :param regressors: List of individual regressors """ Serializable.quick_init(self, locals()) self.regressors = regressors self.output_dims = [x.output_dim for x in regressors]
def __init__(self, update_method=lasagne.updates.adam, learning_rate=1e-3, max_epochs=1000, tolerance=1e-6, batch_size=32, callback=None, verbose=False, **kwargs): """ :param max_epochs: :param tolerance: :param update_method: :param batch_size: None or an integer. If None the whole dataset will be used. :param callback: :param kwargs: :return: """ Serializable.quick_init(self, locals()) self._opt_fun = None self._target = None self._callback = callback update_method = partial(update_method, learning_rate=learning_rate) self._update_method = update_method self._max_epochs = max_epochs self._tolerance = tolerance self._batch_size = batch_size self._verbose = verbose
def __init__(self, env, policy, n_itr=500, max_path_length=500, discount=0.99, sigma0=1., batch_size=None, plot=False, **kwargs): """ :param n_itr: Number of iterations. :param max_path_length: Maximum length of a single rollout. :param batch_size: # of samples from trajs from param distribution, when this is set, n_samples is ignored :param discount: Discount. :param plot: Plot evaluation run after each iteration. :param sigma0: Initial std for param dist :return: """ Serializable.quick_init(self, locals()) self.env = env self.policy = policy self.plot = plot self.sigma0 = sigma0 self.discount = discount self.max_path_length = max_path_length self.n_itr = n_itr self.batch_size = batch_size
def __init__(self, env, policy, baseline, optimizer=None, optimizer_args=None, use_maml=True, **kwargs): Serializable.quick_init(self, locals()) if optimizer is None: default_args = dict( batch_size=None, max_epochs=1, ) if optimizer_args is None: optimizer_args = default_args else: optimizer_args = dict(default_args, **optimizer_args) optimizer = FirstOrderOptimizer(**optimizer_args) self.optimizer = optimizer self.opt_info = None self.use_maml = use_maml super(MAMLVPG, self).__init__(env=env, policy=policy, baseline=baseline, use_maml=use_maml, **kwargs)
def __init__(self, goal_vel=None, *args, **kwargs): self.goal_vel = goal_vel super(HalfCheetahEnvRandDirec, self).__init__(*args, **kwargs) self.goal_vel = goal_vel Serializable.__init__(self, *args, **kwargs) self.goal_vel = goal_vel self.reset(reset_args=goal_vel)
def __init__( self, epsilon=0.5, L2_reg_dual=0., # 1e-5, L2_reg_loss=0., max_opt_itr=50, optimizer=scipy.optimize.fmin_l_bfgs_b, **kwargs): """ :param epsilon: Max KL divergence between new policy and old policy. :param L2_reg_dual: Dual regularization :param L2_reg_loss: Loss regularization :param max_opt_itr: Maximum number of batch optimization iterations. :param optimizer: Module path to the optimizer. It must support the same interface as scipy.optimize.fmin_l_bfgs_b. :return: """ Serializable.quick_init(self, locals()) super(REPS, self).__init__(**kwargs) self.epsilon = epsilon self.L2_reg_dual = L2_reg_dual self.L2_reg_loss = L2_reg_loss self.max_opt_itr = max_opt_itr self.optimizer = optimizer self.opt_info = None
def __init__(self, desc='two-state', map_id=None): self._map_id = map_id Serializable.quick_init(self, locals()) if isinstance(desc, str): desc = MAPS[desc] self.desc_choices = desc self.reset()
def __setstate__(self, d): Serializable.__setstate__(self, d) global load_params if load_params: tf.get_default_session().run( tf.variables_initializer(self.get_params(all_params=True))) self.set_param_values(d["params"], all_params=True)
def __init__( self, name, max_opt_itr=20, initial_penalty=1.0, min_penalty=1e-2, max_penalty=1e6, increase_penalty_factor=2, decrease_penalty_factor=0.5, max_penalty_itr=10, adapt_penalty=True): Serializable.quick_init(self, locals()) self._name = name self._max_opt_itr = max_opt_itr self._penalty = initial_penalty self._initial_penalty = initial_penalty self._min_penalty = min_penalty self._max_penalty = max_penalty self._increase_penalty_factor = increase_penalty_factor self._decrease_penalty_factor = decrease_penalty_factor self._max_penalty_itr = max_penalty_itr self._adapt_penalty = adapt_penalty self._opt_fun = None self._target = None self._max_constraint_val = None self._constraint_name = None
def __init__( self, env, obs_noise=1e-1, ): super(NoisyObservationEnv, self).__init__(env) Serializable.quick_init(self, locals()) self.obs_noise = obs_noise
def __init__(self, observation_space, action_space): """ :type observation_space: Space :type action_space: Space """ Serializable.quick_init(self, locals()) self._observation_space = observation_space self._action_space = action_space
def __init__( self, name, output_dim, hidden_sizes, hidden_nonlinearity, output_nonlinearity, hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer, output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer, input_var=None, input_layer=None, input_shape=None, batch_normalization=False, weight_normalization=False, ): Serializable.quick_init(self, locals()) with tf.variable_scope(name): if input_layer is None: l_in = L.InputLayer(shape=(None, ) + input_shape, input_var=input_var, name="input") else: l_in = input_layer self._layers = [l_in] l_hid = l_in if batch_normalization: l_hid = L.batch_norm(l_hid) for idx, hidden_size in enumerate(hidden_sizes): l_hid = L.DenseLayer(l_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name="hidden_%d" % idx, W=hidden_W_init, b=hidden_b_init, weight_normalization=weight_normalization) if batch_normalization: l_hid = L.batch_norm(l_hid) self._layers.append(l_hid) l_out = L.DenseLayer(l_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name="output", W=output_W_init, b=output_b_init, weight_normalization=weight_normalization) if batch_normalization: l_out = L.batch_norm(l_out) self._layers.append(l_out) self._l_in = l_in self._l_out = l_out # self._input_var = l_in.input_var self._output = L.get_output(l_out) LayersPowered.__init__(self, l_out)
def __init__(self, output_dim, hidden_sizes, hidden_nonlinearity, output_nonlinearity, hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.), output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.), name=None, input_var=None, input_layer=None, input_shape=None, batch_norm=False): Serializable.quick_init(self, locals()) if name is None: prefix = "" else: prefix = name + "_" if input_layer is None: l_in = L.InputLayer(shape=(None, ) + input_shape, input_var=input_var) else: l_in = input_layer self._layers = [l_in] l_hid = l_in for idx, hidden_size in enumerate(hidden_sizes): l_hid = L.DenseLayer( l_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name="%shidden_%d" % (prefix, idx), W=hidden_W_init, b=hidden_b_init, ) if batch_norm: l_hid = L.batch_norm(l_hid) self._layers.append(l_hid) l_out = L.DenseLayer( l_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name="%soutput" % (prefix, ), W=output_W_init, b=output_b_init, ) self._layers.append(l_out) self._l_in = l_in self._l_out = l_out # self._input_var = l_in.input_var self._output = L.get_output(l_out) LasagnePowered.__init__(self, [l_out])
def __init__(self, *args, **kwargs): super(CartpoleSwingupEnv, self).__init__( self.model_path("cartpole.xml.mako"), *args, **kwargs ) self.max_cart_pos = 3 self.max_reward_cart_pos = 3 self.cart = find_body(self.world, "cart") self.pole = find_body(self.world, "pole") Serializable.__init__(self, *args, **kwargs)
def __init__(self, env_spec, mu=0, theta=0.15, sigma=0.3, **kwargs): assert isinstance(env_spec.action_space, Box) assert len(env_spec.action_space.shape) == 1 Serializable.quick_init(self, locals()) self.mu = mu self.theta = theta self.sigma = sigma self.action_space = env_spec.action_space self.state = np.ones(self.action_space.flat_dim) * self.mu self.reset()
def __init__( self, env, action_delay=3, ): assert action_delay > 0, "Should not use this env transformer" super(DelayedActionEnv, self).__init__(env) Serializable.quick_init(self, locals()) self.action_delay = action_delay self._queued_actions = None
def __init__( self, optimizer=None, optimizer_args=None, **kwargs): Serializable.quick_init(self, locals()) if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = PenaltyLbfgsOptimizer(**optimizer_args) super(PPO, self).__init__(optimizer=optimizer, **kwargs)
def __init__( self, env, n_steps=4, axis=0, ): super().__init__(env) Serializable.quick_init(self, locals()) self.n_steps = n_steps self.axis = axis self.buffer = None
def __init__(self, *args, **kwargs): self.max_pole_angle = .2 self.max_cart_pos = 2.4 self.max_cart_speed = 4. self.max_pole_speed = 4. self.reset_range = 0.05 super(CartpoleEnv, self).__init__(self.model_path("cartpole.xml.mako"), *args, **kwargs) self.cart = find_body(self.world, "cart") self.pole = find_body(self.world, "pole") Serializable.__init__(self, *args, **kwargs)
def __init__(self, env_spec, max_sigma=1.0, min_sigma=0.1, decay_period=1000000): assert isinstance(env_spec.action_space, Box) assert len(env_spec.action_space.shape) == 1 Serializable.quick_init(self, locals()) self._max_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = env_spec.action_space
def __init__(self, height_bonus=1., goal_cart_pos=0.6, *args, **kwargs): super(MountainCarEnv, self).__init__( self.model_path("mountain_car.xml.mako"), *args, **kwargs ) self.max_cart_pos = 2 self.goal_cart_pos = goal_cart_pos self.height_bonus = height_bonus self.cart = find_body(self.world, "cart") Serializable.quick_init(self, locals())
def __init__(self, vel_deviation_cost_coeff=1e-2, alive_bonus=0.2, ctrl_cost_coeff=1e-3, impact_cost_coeff=1e-5, *args, **kwargs): self.vel_deviation_cost_coeff = vel_deviation_cost_coeff self.alive_bonus = alive_bonus self.ctrl_cost_coeff = ctrl_cost_coeff self.impact_cost_coeff = impact_cost_coeff super(SimpleHumanoidEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals())
def __init__( self, name, env_spec, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes=[], hidden_nonlinearity=NL.rectify, output_nonlinearity=NL.softmax, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) self._env_spec = env_spec if prob_network is None: prob_network = ConvNetwork( input_shape=env_spec.observation_space.shape, output_dim=env_spec.action_space.n, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, name="prob_network", ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function( [prob_network.input_layer.input_var], L.get_output(prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalConvPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, hidden_W_init=LI.HeUniform(), hidden_b_init=LI.Constant(0.), output_nonlinearity=NL.tanh, output_W_init=LI.Uniform(-3e-3, 3e-3), output_b_init=LI.Uniform(-3e-3, 3e-3), bn=False): Serializable.quick_init(self, locals()) l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim)) l_hidden = l_obs if bn: l_hidden = batch_norm(l_hidden) for idx, size in enumerate(hidden_sizes): l_hidden = L.DenseLayer( l_hidden, num_units=size, W=hidden_W_init, b=hidden_b_init, nonlinearity=hidden_nonlinearity, name="h%d" % idx ) if bn: l_hidden = batch_norm(l_hidden) l_output = L.DenseLayer( l_hidden, num_units=env_spec.action_space.flat_dim, W=output_W_init, b=output_b_init, nonlinearity=output_nonlinearity, name="output" ) # Note the deterministic=True argument. It makes sure that when getting # actions from single observations, we do not update params in the # batch normalization layers action_var = L.get_output(l_output, deterministic=True) self._output_layer = l_output self._f_actions = ext.compile_function([l_obs.input_var], action_var) super(DeterministicMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [l_output])
def __init__(self, desc='4x4'): Serializable.quick_init(self, locals()) if isinstance(desc, str): desc = MAPS[desc] desc = np.array(list(map(list, desc))) desc[desc == '.'] = 'F' desc[desc == 'o'] = 'H' desc[desc == 'x'] = 'W' self.desc = desc self.n_row, self.n_col = desc.shape (start_x, ), (start_y, ) = np.nonzero(desc == 'S') self.start_state = start_x * self.n_col + start_y self.state = None self.domain_fig = None
def __init__(self, optimizer=None, optimizer_args=None, positive_adv=None, **kwargs): Serializable.quick_init(self, locals()) if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = LbfgsOptimizer(**optimizer_args) super(ERWR, self).__init__( optimizer=optimizer, positive_adv=True if positive_adv is None else positive_adv, **kwargs)
def __init__(self, *args, **kwargs): # make sure mdp-level step is 100ms long kwargs["frame_skip"] = kwargs.get("frame_skip", 2) if kwargs.get("template_args", {}).get("noise", False): self.link_len = (np.random.rand() - 0.5) + 1 else: self.link_len = 1 kwargs["template_args"] = kwargs.get("template_args", {}) kwargs["template_args"]["link_len"] = self.link_len super(DoublePendulumEnv, self).__init__(self.model_path("double_pendulum.xml.mako"), *args, **kwargs) self.link1 = find_body(self.world, "link1") self.link2 = find_body(self.world, "link2") Serializable.__init__(self, *args, **kwargs)