def __init__(self, mdp_cls, mdp_args):
     Serializable.quick_init(self, locals())
     self.mdp_cls = mdp_cls
     self.mdp_args = dict(mdp_args)
     self.mdp_args["template_args"] = dict(noise=True)
     mdp = self.gen_mdp()
     super(IdentificationEnv, self).__init__(mdp)
Exemple #2
0
 def __init__(self, regressors):
     """
     :param regressors: List of individual regressors
     """
     Serializable.quick_init(self, locals())
     self.regressors = regressors
     self.output_dims = [x.output_dim for x in regressors]
 def __init__(self, desc='two-state', map_id=None):
     self._map_id = map_id
     Serializable.quick_init(self, locals())
     if isinstance(desc, str):
         desc = MAPS[desc]
     self.desc_choices = desc
     self.reset()
 def __init__(self, name, max_opt_itr=20, callback=None):
     Serializable.quick_init(self, locals())
     self._name = name
     self._max_opt_itr = max_opt_itr
     self._opt_fun = None
     self._target = None
     self._callback = callback
Exemple #5
0
    def __init__(
            self,
            name,
            max_opt_itr=20,
            initial_penalty=1.0,
            min_penalty=1e-2,
            max_penalty=1e6,
            increase_penalty_factor=2,
            decrease_penalty_factor=0.5,
            max_penalty_itr=10,
            adapt_penalty=True):
        Serializable.quick_init(self, locals())
        self._name = name
        self._max_opt_itr = max_opt_itr
        self._penalty = initial_penalty
        self._initial_penalty = initial_penalty
        self._min_penalty = min_penalty
        self._max_penalty = max_penalty
        self._increase_penalty_factor = increase_penalty_factor
        self._decrease_penalty_factor = decrease_penalty_factor
        self._max_penalty_itr = max_penalty_itr
        self._adapt_penalty = adapt_penalty

        self._opt_fun = None
        self._target = None
        self._max_constraint_val = None
        self._constraint_name = None
 def __init__(self,
              env,
              policy,
              baseline,
              optimizer=None,
              optimizer_args=None,
              use_maml=True,
              **kwargs):
     Serializable.quick_init(self, locals())
     if optimizer is None:
         default_args = dict(
             batch_size=None,
             max_epochs=1,
         )
         if optimizer_args is None:
             optimizer_args = default_args
         else:
             optimizer_args = dict(default_args, **optimizer_args)
         optimizer = FirstOrderOptimizer(**optimizer_args)
     self.optimizer = optimizer
     self.opt_info = None
     self.use_maml = use_maml
     super(MAMLVPG, self).__init__(env=env,
                                   policy=policy,
                                   baseline=baseline,
                                   use_maml=use_maml,
                                   **kwargs)
Exemple #7
0
 def __init__(self,
              env,
              policy,
              n_itr=500,
              max_path_length=500,
              discount=0.99,
              sigma0=1.,
              batch_size=None,
              plot=False,
              **kwargs):
     """
     :param n_itr: Number of iterations.
     :param max_path_length: Maximum length of a single rollout.
     :param batch_size: # of samples from trajs from param distribution, when this
     is set, n_samples is ignored
     :param discount: Discount.
     :param plot: Plot evaluation run after each iteration.
     :param sigma0: Initial std for param dist
     :return:
     """
     Serializable.quick_init(self, locals())
     self.env = env
     self.policy = policy
     self.plot = plot
     self.sigma0 = sigma0
     self.discount = discount
     self.max_path_length = max_path_length
     self.n_itr = n_itr
     self.batch_size = batch_size
Exemple #8
0
    def __init__(self,
                 update_method=lasagne.updates.adam,
                 learning_rate=1e-3,
                 max_epochs=1000,
                 tolerance=1e-6,
                 batch_size=32,
                 callback=None,
                 verbose=False,
                 **kwargs):
        """

        :param max_epochs:
        :param tolerance:
        :param update_method:
        :param batch_size: None or an integer. If None the whole dataset will be used.
        :param callback:
        :param kwargs:
        :return:
        """
        Serializable.quick_init(self, locals())
        self._opt_fun = None
        self._target = None
        self._callback = callback
        update_method = partial(update_method, learning_rate=learning_rate)
        self._update_method = update_method
        self._max_epochs = max_epochs
        self._tolerance = tolerance
        self._batch_size = batch_size
        self._verbose = verbose
Exemple #9
0
    def __init__(
            self,
            epsilon=0.5,
            L2_reg_dual=0.,  # 1e-5,
            L2_reg_loss=0.,
            max_opt_itr=50,
            optimizer=scipy.optimize.fmin_l_bfgs_b,
            **kwargs):
        """

        :param epsilon: Max KL divergence between new policy and old policy.
        :param L2_reg_dual: Dual regularization
        :param L2_reg_loss: Loss regularization
        :param max_opt_itr: Maximum number of batch optimization iterations.
        :param optimizer: Module path to the optimizer. It must support the same interface as
        scipy.optimize.fmin_l_bfgs_b.
        :return:
        """
        Serializable.quick_init(self, locals())
        super(REPS, self).__init__(**kwargs)
        self.epsilon = epsilon
        self.L2_reg_dual = L2_reg_dual
        self.L2_reg_loss = L2_reg_loss
        self.max_opt_itr = max_opt_itr
        self.optimizer = optimizer
        self.opt_info = None
    def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True,
                 force_reset=False):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)
        self.env = env
        self.env_id = env.spec.id

        monitor_manager.logger.setLevel(logging.WARNING)

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        self._action_space = convert_gym_space(env.action_space)
        self._horizon = env.spec.timestep_limit
        self._log_dir = log_dir
        self._force_reset = force_reset
 def __init__(
     self,
     env,
     obs_noise=1e-1,
 ):
     super(NoisyObservationEnv, self).__init__(env)
     Serializable.quick_init(self, locals())
     self.obs_noise = obs_noise
Exemple #12
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
Exemple #13
0
    def __init__(
        self,
        name,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        output_nonlinearity,
        hidden_W_init=L.XavierUniformInitializer(),
        hidden_b_init=tf.zeros_initializer,
        output_W_init=L.XavierUniformInitializer(),
        output_b_init=tf.zeros_initializer,
        input_var=None,
        input_layer=None,
        input_shape=None,
        batch_normalization=False,
        weight_normalization=False,
    ):

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)
            if batch_normalization:
                l_out = L.batch_norm(l_out)
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Exemple #14
0
    def __init__(self,
                 output_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=LI.GlorotUniform(),
                 hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(),
                 output_b_init=LI.Constant(0.),
                 name=None,
                 input_var=None,
                 input_layer=None,
                 input_shape=None,
                 batch_norm=False):

        Serializable.quick_init(self, locals())

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if input_layer is None:
            l_in = L.InputLayer(shape=(None, ) + input_shape,
                                input_var=input_var)
        else:
            l_in = input_layer
        self._layers = [l_in]
        l_hid = l_in
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
            if batch_norm:
                l_hid = L.batch_norm(l_hid)
            self._layers.append(l_hid)

        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix, ),
            W=output_W_init,
            b=output_b_init,
        )
        self._layers.append(l_out)
        self._l_in = l_in
        self._l_out = l_out
        # self._input_var = l_in.input_var
        self._output = L.get_output(l_out)
        LasagnePowered.__init__(self, [l_out])
 def __init__(
     self,
     env,
     action_delay=3,
 ):
     assert action_delay > 0, "Should not use this env transformer"
     super(DelayedActionEnv, self).__init__(env)
     Serializable.quick_init(self, locals())
     self.action_delay = action_delay
     self._queued_actions = None
Exemple #16
0
 def __init__(self, env_spec, mu=0, theta=0.15, sigma=0.3, **kwargs):
     assert isinstance(env_spec.action_space, Box)
     assert len(env_spec.action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self.mu = mu
     self.theta = theta
     self.sigma = sigma
     self.action_space = env_spec.action_space
     self.state = np.ones(self.action_space.flat_dim) * self.mu
     self.reset()
 def __init__(
         self,
         optimizer=None,
         optimizer_args=None,
         **kwargs):
     Serializable.quick_init(self, locals())
     if optimizer is None:
         if optimizer_args is None:
             optimizer_args = dict()
         optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
     super(PPO, self).__init__(optimizer=optimizer, **kwargs)
Exemple #18
0
 def __init__(
     self,
     env,
     n_steps=4,
     axis=0,
 ):
     super().__init__(env)
     Serializable.quick_init(self, locals())
     self.n_steps = n_steps
     self.axis = axis
     self.buffer = None
 def __init__(self,
              env_spec,
              max_sigma=1.0,
              min_sigma=0.1,
              decay_period=1000000):
     assert isinstance(env_spec.action_space, Box)
     assert len(env_spec.action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self._max_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = env_spec.action_space
 def __init__(self,
              height_bonus=1.,
              goal_cart_pos=0.6,
              *args, **kwargs):
     super(MountainCarEnv, self).__init__(
         self.model_path("mountain_car.xml.mako"),
         *args, **kwargs
     )
     self.max_cart_pos = 2
     self.goal_cart_pos = goal_cart_pos
     self.height_bonus = height_bonus
     self.cart = find_body(self.world, "cart")
     Serializable.quick_init(self, locals())
 def __init__(self,
              vel_deviation_cost_coeff=1e-2,
              alive_bonus=0.2,
              ctrl_cost_coeff=1e-3,
              impact_cost_coeff=1e-5,
              *args,
              **kwargs):
     self.vel_deviation_cost_coeff = vel_deviation_cost_coeff
     self.alive_bonus = alive_bonus
     self.ctrl_cost_coeff = ctrl_cost_coeff
     self.impact_cost_coeff = impact_cost_coeff
     super(SimpleHumanoidEnv, self).__init__(*args, **kwargs)
     Serializable.quick_init(self, locals())
Exemple #22
0
    def __init__(
        self,
        name,
        env_spec,
        conv_filters,
        conv_filter_sizes,
        conv_strides,
        conv_pads,
        hidden_sizes=[],
        hidden_nonlinearity=NL.rectify,
        output_nonlinearity=NL.softmax,
        prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])
Exemple #23
0
    def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            hidden_W_init=LI.HeUniform(),
            hidden_b_init=LI.Constant(0.),
            output_nonlinearity=NL.tanh,
            output_W_init=LI.Uniform(-3e-3, 3e-3),
            output_b_init=LI.Uniform(-3e-3, 3e-3),
            bn=False):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_W_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx
            )
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_W_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output"
        )

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = ext.compile_function([l_obs.input_var], action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])
Exemple #24
0
 def __init__(self, desc='4x4'):
     Serializable.quick_init(self, locals())
     if isinstance(desc, str):
         desc = MAPS[desc]
     desc = np.array(list(map(list, desc)))
     desc[desc == '.'] = 'F'
     desc[desc == 'o'] = 'H'
     desc[desc == 'x'] = 'W'
     self.desc = desc
     self.n_row, self.n_col = desc.shape
     (start_x, ), (start_y, ) = np.nonzero(desc == 'S')
     self.start_state = start_x * self.n_col + start_y
     self.state = None
     self.domain_fig = None
Exemple #25
0
 def __init__(self,
              optimizer=None,
              optimizer_args=None,
              positive_adv=None,
              **kwargs):
     Serializable.quick_init(self, locals())
     if optimizer is None:
         if optimizer_args is None:
             optimizer_args = dict()
         optimizer = LbfgsOptimizer(**optimizer_args)
     super(ERWR, self).__init__(
         optimizer=optimizer,
         positive_adv=True if positive_adv is None else positive_adv,
         **kwargs)
Exemple #26
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        regressor_args=None,
    ):
        Serializable.quick_init(self, locals())
        super(GaussianConvBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianConvRegressor(
            input_shape=env_spec.observation_space.shape,
            output_dim=1,
            name="vf",
            **regressor_args)
    def __init__(
            self,
            name,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.tanh,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)
        obs_dim = env_spec.observation_space.flat_dim
        action_dim = env_spec.action_space.flat_dim

        with tf.variable_scope(name):
            if prob_network is None:
                prob_network = self.create_MLP(
                    input_shape=(obs_dim,),
                    output_dim=env_spec.action_space.n,
                    hidden_sizes=hidden_sizes,
                    name="prob_network",
                )
            self._l_obs, self._l_prob = self.forward_MLP('prob_network', prob_network,
                n_hidden=len(hidden_sizes), input_shape=(obs_dim,),
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=tf.nn.softmax, reuse=None)

            # if you want to input your own tensor.
            self._forward_out = lambda x, is_train: self.forward_MLP('prob_network', prob_network,
                n_hidden=len(hidden_sizes), hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=output_nonlinearity, input_tensor=x, is_training=is_train)[1]


            self._f_prob = tensor_utils.compile_function(
                [self._l_obs],
                L.get_output(self._l_prob)
            )

            self._dist = Categorical(env_spec.action_space.n)
Exemple #28
0
    def __init__(self,
                 tf_optimizer_cls=None,
                 tf_optimizer_args=None,
                 step_size=1e-3,
                 max_epochs=1000,
                 tolerance=1e-6,
                 batch_size=32,
                 callback=None,
                 verbose=False,
                 init_learning_rate=None,
                 **kwargs):
        """

        :param max_epochs:
        :param tolerance:
        :param update_method:
        :param batch_size: None or an integer. If None the whole dataset will be used.
        :param callback:
        :param kwargs:
        :return:
        """
        Serializable.quick_init(self, locals())
        self._opt_fun = None
        self._target = None
        self._callback = callback
        if tf_optimizer_cls is None:
            tf_optimizer_cls = tf.train.AdamOptimizer
        if tf_optimizer_args is None:
            tf_optimizer_args = dict(learning_rate=step_size)
        self.learning_rate = tf_optimizer_args['learning_rate']
        self._tf_optimizer = tf_optimizer_cls(**tf_optimizer_args)
        self._init_tf_optimizer = None
        if init_learning_rate is not None:
            init_tf_optimizer_args = dict(learning_rate=init_learning_rate)
            self._init_tf_optimizer = tf_optimizer_cls(
                **init_tf_optimizer_args)
        self._max_epochs = max_epochs
        self._tolerance = tolerance
        self._batch_size = batch_size
        self._verbose = verbose
        self._input_vars = None
        self._train_op = None
        self._init_train_op = None
 def __init__(
     self,
     env,
     scale_reward=1.,
     normalize_obs=False,
     normalize_reward=False,
     obs_alpha=0.001,
     reward_alpha=0.001,
 ):
     ProxyEnv.__init__(self, env)
     Serializable.quick_init(self, locals())
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
    def __init__(
        self,
        name,
        env_spec,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        with tf.variable_scope(name):
            if prob_network is None:
                prob_network = MLP(
                    input_shape=(env_spec.observation_space.flat_dim, ),
                    output_dim=env_spec.action_space.n,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name="prob_network",
                )

            self._l_prob = prob_network.output_layer
            self._l_obs = prob_network.input_layer
            self._f_prob = tensor_utils.compile_function(
                [prob_network.input_layer.input_var],
                L.get_output(prob_network.output_layer))

            self._dist = Categorical(env_spec.action_space.n)

            super(CategoricalMLPPolicy, self).__init__(env_spec)
            LayersPowered.__init__(self, [prob_network.output_layer])