Exemple #1
0
    def __init__(self,
                 environment,
                 policy,
                 max_it=1000,
                 num_workers=1,
                 rate=0.1,
                 discount=0.1,
                 log_file=False):
        """Initialize A3C.

        Parameters
        ----------
        environment:
            Environment we want to optimize the policy on.
        policy:
            The policy we want to optimize. The policy needs be defined by a
            tensorflow neural network and define certain attributes.
        max_it: int
            Maximum number of iterations.
        num_workers: int
            Number of workers.
        rate: float
            Update rate passed to the optimizer.
        discount: float
            Discount for the computation of the discounted reward.
        log_file: string
            Indicate the relative path to directory where a summary event file
            should be generated. If `None` no tensorflow logs will be stored.
        """
        add_dependency(tf, 'TensorFlow')

        if policy.is_set_up:
            raise (ValueError('Policy should not be set up.'))

        super(A3C, self).__init__(environment, policy, max_it)

        self.num_workers = num_workers
        self.rate = rate
        self.discount = discount

        self.done = False

        self.log_file = log_file

        self.policy = policy

        # init networks
        with tf.device("/cpu:0"):
            with tf.variable_scope('global'):
                self.p_net = _PolicyNet(self.policy, rate)
                self.v_net = _ValueNet(self.policy, rate)

        self.workers = []
        self.threads = []

        self.global_counter = 0

        self.sess = None
        self.coord = None
    def __init__(self, env, horizon=100, render=False):
        """Initialize attributes.

        Parameters
        ----------
        env : gym environment
            Instance of the gym environment that should be optimized on.
        horizon : integer
            Horizon for rollout.
        render : boolean
            Default: False ; If True simulation will be rendered during
            rollouts on this instance.
        """
        add_dependency(gym, 'Gym')

        EnvironmentBase.__init__(self, env.observation_space, env.action_space,
                                 horizon)
        self.environment = env.unwrapped
        self.render = render
        self.done = False

        self.environment.reset()
    def __init__(self,
                 layers,
                 weights=None,
                 init_weights=None,
                 activation=None,
                 dtype='float',
                 scope='global',
                 do_setup=False):
        """Initialize Neural Network wrapper."""
        add_dependency(tf, 'TensorFlow')

        if (len(layers) < 2):
            raise ValueError('At least two layers needed.')

        # determine state and action space
        state_space = RdSpace((layers[0], ))
        action_space = RdSpace((layers[-1], ))

        # store arguments convenient for copy operation
        self.args = [layers]
        self.kwargs = {
            'weights': weights,
            'init_weights': init_weights,
            'activation': activation,
            'dtype': dtype
        }

        self.state_space = state_space
        self.action_space = action_space

        self.dtype = dtype
        self.layers = layers
        self.scope = scope

        self.is_set_up = False

        if init_weights is None:
            self.init_weights = default_init_weights
        else:
            self.init_weights = init_weights

        # Activation function
        if activation is None:
            activation = (len(layers) - 2) * [tf.sigmoid]
        elif (isinstance(activation, list)
              and (len(activation) != len(layers) - 2)):
            raise ValueError('Activation list has wrong size.')
        else:
            activation = (len(layers) - 2) * [activation]

        self.activation = activation

        # Symbols
        self.X = tf.placeholder(dtype, shape=[None, layers[0]], name='X')
        self.a = tf.placeholder(dtype, shape=[None, layers[-1]], name='a')

        if do_setup:
            with tf.variable_scope(self.scope):
                self.setup()
        else:
            # Make sure all fields exist
            self.W_action = None
            self.W_var = None
            self.a_pred = None
            self.var = None
            self.h = None

        self.sess = None
Exemple #4
0
    def __init__(self,
                 environment, policy, max_it, avg_reward, window,
                 kernel, likelihood, fmin, bounds, beta=3.0, threshold=0,
                 scaling='auto', swarm_size=20, info=None):
        """Initialize Attributes.

        Parameters
        ----------
        environment :
            Environment to be optimized.
        policy :
            policy to be optimized.
        max_it :
            maximal number of iterations before we abort.
        avg_reward : integer
            average reward at which the optimization will be finished.
        window : integer
            window for the average reward
        kernel : GPy kernel
            Kernel used to initialize the gaussian process. If this is a list
            multiple kernels will be initialized. The size of this argument
            has to agree with the size of the likelihood.
        likelihood : GPy likelihood
            Likelihood used to initialize kernels. If this is a list, multiple
            kernels will be initialized. The size of this argument has to
            agree with the size of the likelihood.
        fmin : list of floats
            Safety threshold for the function value. If multiple safety
            constraints are used this can also be a list of floats (the first
            one is always the one for the values, can be set to None if not
            wanted)
        bounds : pair of floats or list of pairs of floats
            If a list is given, then each pair represents the lower/upper bound
            in each dimension. Otherwise, we assume the same bounds for all
            dimensions. This is mostly important for plotting or to restrict
            particles to a certain domain.
        beta : float or callable
            A constant or a function of the time step that scales the
            confidence interval of the acquisition function.
        threshold : float or list of floats
            The algorithm will not try to expand any points that are below this
            threshold. This makes the algorithm stop expanding points
            eventually. If a list, this represents the stopping criterion for
            all the gps. This ignores the scaling factor.
        scaling : list of floats or "auto"
            A list used to scale the GP uncertainties to compensate for
            different input sizes. This should be set to the maximal variance
            of each kernel. You should probably set this to "auto" unless your
            kernel is non-stationary
        swarm_size : int
            The number of particles in each of the optimization swarms
        info :
            Dummy argument that can hold anything usable to identify the
            configuration.
        """
        add_dependency(safeopt, 'SafeOpt')
        add_dependency(GPy, 'GPy')

        # store the `SafeOpt` arguments.
        gp_opt_par = {
            'fmin': fmin,
            'bounds': bounds,
            'beta': beta,
            'threshold': threshold,
            'scaling': scaling,
            'swarm_size': swarm_size
        }

        # store the kernel arguments
        if not isinstance(kernel, list):
            kernel = [kernel]
        if not isinstance(likelihood, list):
            likelihood = [likelihood]
        assert len(likelihood) == len(kernel), (
            'kernel and likelihood need to have same length (%d /= %d)'
            % (len(likelihood), len(kernel)))

        gp_par = (kernel, likelihood)

        super(SafeOptSwarm, self).__init__(safeopt.SafeOptSwarm, gp_opt_par,
                                           gp_par, environment, policy, max_it,
                                           avg_reward, window)
Exemple #5
0
    def __init__(self,
                 environment, policy, max_it, avg_reward, window,
                 kernel, likelihood, parameter_set, fmin,
                 lipschitz=None, beta=3.0, num_contexts=0, threshold=0,
                 scaling='auto', info=None):
        """Initialize Attributes.

        Parameters
        ----------
        environment :
            environmet to be optimized.
        policy :
            policy to be optimized.
        max_it :
            maximal number of iterations before we abort.
        avg_reward : integer
            average reward at which the optimization will be finished.
        window : integer
            window for the average reward
        kernel : GPy kernel
            Kernel used to initialize the gaussian process. If this is a list
            multiple kernels will be initialized. The size of this argument
            has to agree with the size of the likelihood.
        likelihood : GPy likelihood
            Likelihood used to initialize kernels. If this is a list, multiple
            kernels will be initialized. The size of this argument has to
            agree with the size of the likelihood.
        parameter_set : 2d-array
            List of parameters
        fmin : list of floats
            Safety threshold for the function value. If multiple safety
            constraints are used this can also be a list of floats (the first
            one is always the one for the values, can be set to None if not
            wanted)
        lipschitz : list of floats
            The Lipschitz constant of the system, if None the GP confidence
            intervals are used directly.
        beta : float or callable
            A constant or a function of the time step that scales the
            confidence interval of the acquisition function.
        threshold : float or list of floats
            The algorithm will not try to expand any points that are below this
            threshold. This makes the algorithm stop expanding points
            eventually. If a list, this represents the stopping criterion for
            all the gps. This ignores the scaling factor.
        scaling : list of floats or "auto"
            A list used to scale the GP uncertainties to compensate for
            different input sizes. This should be set to the maximal variance
            of each kernel. You should probably leave this to "auto" unless
            your kernel is non-stationary.
        info :
            Dummy argument that can hold anything usable to identify the
            configuration.
        """
        add_dependency(safeopt, 'SafeOpt')
        add_dependency(GPy, 'GPy')

        # store the `SafeOpt` arguments.
        gp_opt_par = {
            'parameter_set': parameter_set,
            'fmin': fmin,
            'lipschitz': lipschitz,
            'beta': beta,
            'num_contexts': num_contexts,
            'threshold': threshold,
            'scaling': scaling}

        # store the kernel arguments
        if not isinstance(kernel, list):
            kernel = [kernel]
        if not isinstance(likelihood, list):
            likelihood = [likelihood]
        assert len(likelihood) == len(kernel), (
            'kernel and likelihood need to have same length (%d /= %d)'
            % (len(likelihood), len(kernel)))

        gp_par = (kernel, likelihood)

        super(SafeOpt, self).__init__(safeopt.SafeOpt, gp_opt_par, gp_par,
                                      environment, policy, max_it, avg_reward,
                                      window)