def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            if 'distribution' in action:
                kwargs = dict(action)
                self.distribution[name] = Distribution.from_config(
                    config=action.distribution, kwargs=kwargs)
            elif action.continuous:
                if action.min_value is None:
                    assert action.max_value is None
                    self.distribution[name] = Gaussian(shape=action.shape)
                else:
                    assert action.max_value is not None
                    self.distribution[name] = Beta(min_value=action.min_value,
                                                   max_value=action.max_value,
                                                   shape=action.shape)
            else:
                self.distribution[name] = Categorical(
                    shape=action.shape, num_actions=action.num_actions)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            self.baseline = Baseline.from_config(config=config.baseline)

        # advantage estimation
        self.gae_rewards = config.gae_rewards
        self.gae_lambda = config.gae_lambda
        self.normalize_rewards = config.normalize_rewards

        super(PolicyGradientModel, self).__init__(config)
Ejemplo n.º 2
0
    def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            if 'distribution' in action:
                if not action.continuous:
                    kwargs = dict(num_actions=action.num_actions)
                elif 'min_value' in action:
                    kwargs = dict(min_value=action.min_value, max_value=action.max_value)
                else:
                    kwargs = dict()
                self.distribution[name] = Distribution.from_config(config=action.distribution, kwargs=kwargs)
            # elif 'min_value' in action:
            #     ...
            elif action.continuous:
                self.distribution[name] = Gaussian()
            else:
                self.distribution[name] = Categorical(num_actions=action.num_actions)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            self.baseline = Baseline.from_config(config=config.baseline)

        super(PolicyGradientModel, self).__init__(config)

        # advantage estimation
        self.generalized_advantage_estimation = config.generalized_advantage_estimation
        if self.generalized_advantage_estimation:
            self.gae_lambda = config.gae_lambda
        self.normalize_advantage = config.normalize_advantage