Esempio n. 1
0
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        parameter_size          int             10                  size of the weight vector
        mu                      float           0.01                value of mu in table 1
        tau                     float           10000               value of tau in table 1
        init_stepsize           float           0.001               initial stepsize value
        increase_setting        str             'keep'              specifies how to handle the old information of a
                                                                    feature when adding new features:
                                                                        keep: keeps the same stepsize info as before
                                                                              adding a new feature
                                                                        reset: resets the stepsize info to initial
                                                                               settings
                                                                        max: takes the max of the initial and current
                                                                             stepsize
        """
        self.parameter_size = check_attribute(config, attr_name='parameter_size', default_value=10, data_type=int)
        self.mu = check_attribute(config, attr_name='mu', default_value=0.01, data_type=float)
        self.tau = check_attribute(config, attr_name='tau', default_value=10000.0, data_type=float)
        self.init_stepsize = check_attribute(config, attr_name='init_stepsize', default_value=0.001, data_type=float)
        self.increase_setting = check_attribute(config, attr_name='increase_setting', default_value='keep',
                                                data_type=str, choices=['keep', 'reset', 'max'])

        self.stepsizes = np.ones(self.parameter_size) * self.init_stepsize
        self.v = np.zeros(self.parameter_size)
        self.h = np.zeros(self.parameter_size)
Esempio n. 2
0
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        parameter_size          int             10                  size of the weight vector
        theta                   float           0.01                meta-stepsize
        tau                     float           10000               eta decay
        init_stepsize           float           0.001               initial stepsize value
        """
        self.parameter_size = check_attribute(config,
                                              attr_name='parameter_size',
                                              default_value=10,
                                              data_type=int)
        self.theta = check_attribute(config,
                                     attr_name='theta',
                                     default_value=0.01,
                                     data_type=float)
        self.tau = check_attribute(config,
                                   attr_name='tau',
                                   default_value=10000.0,
                                   data_type=float)
        self.init_stepsize = check_attribute(config,
                                             attr_name='init_stepsize',
                                             default_value=0.001,
                                             data_type=float)

        self.beta = np.ones(self.parameter_size, dtype=np.float64) * np.log(
            self.init_stepsize)
        self.alpha = np.ones(self.parameter_size) * self.init_stepsize
        self.eta = np.zeros(self.parameter_size, dtype=np.float64)
        self.h = np.zeros(self.parameter_size, dtype=np.float64)
        self.z = np.zeros(self.parameter_size, dtype=np.float64)
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        init_noise_var          float           0.1                 initial variance of the feature noise
        num_obs_features        int             4                   number of observable features for the function
                                                                    approximator
        """
        self.init_noise_var = check_attribute(config, 'init_noise_var', 0.1)
        self.num_obs_features = check_attribute(config, 'num_obs_features', 4)

        self.noise_var = np.zeros(self.num_obs_features,
                                  dtype=np.float64) + self.init_noise_var
        self.noise_mean = 0

        self.num_true_features = 4

        self.num_states = 13
        self.optimal_weights_norm = np.sqrt(24.0**2 + 16**2 + 8**2)
        self.optimal_weights = np.array(
            (0, -8, -16, -24), dtype=np.float64) / self.optimal_weights_norm
        self.state_features, self.observable_features = self._initialize_feature_matrices(
        )
        # steady state distribution computed using the partial sum of the Cesaro limit with 10 million iterations
        self.steady_state_distribution = np.array([
            0.1084, 0.0723, 0.0723, 0.0722, 0.0724, 0.072, 0.0729, 0.0712,
            0.0745, 0.0678, 0.0813, 0.0542, 0.1084
        ])

        self.current_state = 12
        self.current_features = self.state_features[self.current_state]
Esempio n. 4
0
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        parameter_size          int             10                  size of the weight vector
        alpha                   float           0.001               step size parameter
        rescale                 bool            False               if True, keeps a running average  of the square sum
                                                                    of the features, which resets every time a new
                                                                    feature is added
        """
        self.parameter_size = check_attribute(config,
                                              attr_name='parameter_size',
                                              default_value=10,
                                              data_type=int)
        self.alpha = check_attribute(config,
                                     attr_name='alpha',
                                     default_value=0.001,
                                     data_type=float)
        self.rescale = check_attribute(config,
                                       attr_name='rescale',
                                       default_value=False,
                                       data_type=bool)

        self.stepsizes = np.ones(self.parameter_size) * self.alpha
        self.count = 0
        self.running_sum = 0
 def __init__(self, config: Config):
     """
     Parameters in config:
     Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
     num_obs_features        int             5                   number of initial true features
     max_num_features        int             20000               maximum number of features that can be added
     """
     self.num_obs_features = check_attribute(config, 'num_obs_features', 5)
     self.max_num_features = check_attribute(config, 'max_num_features',
                                             20000)
     self.theta_hat = np.zeros(self.max_num_features, dtype=np.float64)
Esempio n. 6
0
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        parameter_size          int             10                  size of the weight vector
        init_beta               float           log(0.001)          initial value of beta
        theta                   float           0.1                 meta-stepsize parameter
        increase_setting        str             'keep'              specifies how to handle the old information of a
                                                                    feature when adding new features:
                                                                        keep: keeps the same stepsize info as before
                                                                              adding a new feature
                                                                        reset: resets the stepsize info to initial
                                                                               settings
                                                                        max: takes the max of the initial and current
                                                                             stepsize
        """
        self.parameter_size = check_attribute(config,
                                              attr_name='parameter_size',
                                              default_value=10,
                                              data_type=int)
        self.init_beta = check_attribute(config,
                                         attr_name='init_beta',
                                         default_value=np.log(0.001),
                                         data_type=float)
        self.theta = check_attribute(config,
                                     attr_name='theta',
                                     default_value=0.1,
                                     data_type=float)
        self.increase_setting = check_attribute(
            config,
            attr_name='increase_setting',
            default_value='keep',
            data_type=str,
            choices=['keep', 'reset', 'max'])

        self.beta = np.ones(self.parameter_size) * self.init_beta
        self.beta_max = 100
        self.beta_min = -100
        self.h = np.zeros(self.parameter_size)
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        num_true_features       int             20                  number of features used to compute the target
        num_obs_features        int             5                   number of observable features for the function
                                                                    approximator
        max_num_features        int             20000               maximum number of features that can be added
        """
        self.num_true_features = check_attribute(config, 'num_true_features',
                                                 20)
        self.num_obs_features = check_attribute(config, 'num_obs_features', 5)
        self.max_num_features = check_attribute(config, 'max_num_features',
                                                20000)
        assert self.num_obs_features <= self.num_true_features
        assert self.num_true_features <= self.max_num_features

        self.theta = np.random.uniform(0, 1, size=self.num_true_features)
        self.theta /= np.linalg.norm(self.theta)

        self.feature_type = np.zeros(self.max_num_features, dtype=bool)
        self.feature_type[:self.
                          num_obs_features] += True  # True if real feature, otherwise False
Esempio n. 8
0
    def __init__(self, config: Config):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        parameter_size          int             10                  size of the weight vector
        init_alpha              float           0.001               initial value of the stepsize
        beta1                   float           0.9                 initial value of beta1
        beta2                   float           0.99                initial value of beta2
        eps                     float           1e-08               epsilon value to prevent division by zero
        increase_setting        str             'keep'              specifies how to handle the old information of a
                                                                    feature when adding new features:
                                                                        keep: keeps the same stepsize info as before
                                                                              adding a new feature
                                                                        reset: resets the stepsize info to initial
                                                                               settings
        """
        self.parameter_size = check_attribute(config,
                                              attr_name='parameter_size',
                                              default_value=10,
                                              data_type=int)
        self.init_alpha = check_attribute(config,
                                          attr_name='init_alpha',
                                          default_value=0.001,
                                          data_type=float)
        self.beta1 = check_attribute(config,
                                     attr_name='beta1',
                                     default_value=0.9,
                                     data_type=float)
        self.beta2 = check_attribute(config,
                                     attr_name='beta2',
                                     default_value=0.99,
                                     data_type=float)
        self.eps = check_attribute(config,
                                   attr_name='eps',
                                   default_value=1e-08,
                                   data_type=float)
        self.increase_setting = check_attribute(config,
                                                attr_name='increase_setting',
                                                default_value='keep',
                                                data_type=str,
                                                choices=['keep', 'reset'])

        self.stepsize = np.ones(self.parameter_size,
                                dtype=np.float64) * self.init_alpha
        self.m = np.zeros(self.parameter_size)
        self.v = np.zeros(self.parameter_size)
        self.t = np.zeros(self.parameter_size)
Esempio n. 9
0
    def __init__(self, config):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        norm_state                  bool            True            whether to normalize the state between -1 and 1 
        """
        self.config = config

        # environment related variables
        self.norm_state = check_attribute(config, 'norm_state', True)

        # internal state of the environment
        position = -0.6 + np.random.random() * 0.2
        velocity = 0.0
        self.current_state = np.array((position, velocity), dtype=np.float64)
        self.actions = np.array(
            [0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float64)
        self.low = np.array([-1.2, -0.07], dtype=np.float64)
        self.action_dictionary = {
            0: -1,  # accelerate backwards
            1: 0,  # coast
            2: 1
        }  # accelerate forwards
Esempio n. 10
0
    def __init__(self, config=None):
        """
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        state_dims              int             2                   number of state dimensions
        state_lims              np.array        ((-1,1),(-1,1))     upper an lower limits of the value of each
                                                                    state dimension, the first dimension must be equal
                                                                    to state_dims
        init_centers            np.ndarray      ((0.1,0),           the centers for the radial basis functions
                                                (0,0.1),
                                                (-0.1,0),
                                                (0,-0.1))
        sigma                   float           0.5                 the width of each feature
        init_noise_mean         float           0.0                 mean of the noise of each feature
        init_noise_var          float           0.1                 variance of the noise noise of each feature
        """
        self.state_dims = check_attribute(config, 'state_dims', 2)
        self.state_lims = check_attribute(
            config, 'state_lims', np.array(((-1, 1), (-1, 1)),
                                           dtype=np.float64))
        assert self.state_lims.shape[0] == self.state_dims
        self.centers = check_attribute(
            config, 'initial_centers',
            np.array(((0.1, 0), (0, 0.1), (-0.1, 0), (0, -0.1))))
        assert self.centers.shape[1] == self.state_dims
        self.sigma = check_attribute(config, 'sigma', 0.5)
        self.init_noise_mean = check_attribute(config,
                                               'init_noise_mean',
                                               default_value=0.0)
        self.init_noise_var = check_attribute(config,
                                              'init_noise_var',
                                              default_value=0.1)

        self.num_features = self.centers.shape[0]
        self.relevant_features = np.ones(self.num_features, dtype=np.float64)
        self.noise_mean = np.zeros(self.num_features,
                                   dtype=np.float64) + self.init_noise_mean
        self.noise_var = np.zeros(self.num_features,
                                  dtype=np.float64) + self.init_noise_var
        # flag to indicate if irrelevant or noisy features have been added
        self.added_noisy_or_irrelevant_features = False