Esempio n. 1
0
    def create_distributions(self):
        distributions = dict()
        for name, action in self.actions_spec.items():

            if self.distributions_spec is not None and name in self.distributions_spec:
                kwargs = dict(action)
                kwargs['summary_labels'] = self.summary_labels
                distributions[name] = Distribution.from_spec(
                    spec=self.distributions_spec[name], kwargs=kwargs)

            elif action['type'] == 'bool':
                distributions[name] = Bernoulli(
                    shape=action['shape'], summary_labels=self.summary_labels)

            elif action['type'] == 'int':
                distributions[name] = Categorical(
                    shape=action['shape'],
                    num_actions=action['num_actions'],
                    summary_labels=self.summary_labels)

            elif action['type'] == 'float':
                if 'min_value' in action:
                    distributions[name] = Beta(
                        shape=action['shape'],
                        min_value=action['min_value'],
                        max_value=action['max_value'],
                        summary_labels=self.summary_labels)

                else:
                    distributions[name] = Gaussian(
                        shape=action['shape'],
                        summary_labels=self.summary_labels)

        return distributions
    def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            if 'distribution' in action:
                kwargs = dict(action)
                self.distribution[name] = Distribution.from_config(
                    config=action.distribution, kwargs=kwargs)
            elif action.continuous:
                if action.min_value is None:
                    assert action.max_value is None
                    self.distribution[name] = Gaussian(shape=action.shape)
                else:
                    assert action.max_value is not None
                    self.distribution[name] = Beta(min_value=action.min_value,
                                                   max_value=action.max_value,
                                                   shape=action.shape)
            else:
                self.distribution[name] = Categorical(
                    shape=action.shape, num_actions=action.num_actions)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            self.baseline = Baseline.from_config(config=config.baseline)

        # advantage estimation
        self.gae_rewards = config.gae_rewards
        self.gae_lambda = config.gae_lambda
        self.normalize_rewards = config.normalize_rewards

        super(PolicyGradientModel, self).__init__(config)
Esempio n. 3
0
    def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            if 'distribution' in action:
                if not action.continuous:
                    kwargs = dict(num_actions=action.num_actions)
                elif 'min_value' in action:
                    kwargs = dict(min_value=action.min_value, max_value=action.max_value)
                else:
                    kwargs = dict()
                self.distribution[name] = Distribution.from_config(config=action.distribution, kwargs=kwargs)
            # elif 'min_value' in action:
            #     ...
            elif action.continuous:
                self.distribution[name] = Gaussian()
            else:
                self.distribution[name] = Categorical(num_actions=action.num_actions)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            self.baseline = Baseline.from_config(config=config.baseline)

        super(PolicyGradientModel, self).__init__(config)

        # advantage estimation
        self.generalized_advantage_estimation = config.generalized_advantage_estimation
        if self.generalized_advantage_estimation:
            self.gae_lambda = config.gae_lambda
        self.normalize_advantage = config.normalize_advantage
Esempio n. 4
0
    def __init__(self, states_spec, actions_spec, network_spec, config):

        with tf.name_scope(name=config.scope):
            # Network
            self.network = Network.from_spec(
                spec=network_spec,
                kwargs=dict(summary_labels=config.summary_labels))

            # Distributions
            self.distributions = dict()
            for name, action in actions_spec.items():

                with tf.name_scope(name=(name + '-distribution')):

                    if config.distributions is not None and name in config.distributions:
                        kwargs = dict(action)
                        kwargs['summary_labels'] = config.summary_labels
                        self.distributions[name] = Distribution.from_spec(
                            spec=config.distributions[name], kwargs=kwargs)

                    elif action['type'] == 'bool':
                        self.distributions[name] = Bernoulli(
                            shape=action['shape'],
                            summary_labels=config.summary_labels)

                    elif action['type'] == 'int':
                        self.distributions[name] = Categorical(
                            shape=action['shape'],
                            num_actions=action['num_actions'],
                            summary_labels=config.summary_labels)

                    elif action['type'] == 'float':
                        if 'min_value' in action:
                            self.distributions[name] = Beta(
                                shape=action['shape'],
                                min_value=action['min_value'],
                                max_value=action['max_value'],
                                summary_labels=config.summary_labels)

                        else:
                            self.distributions[name] = Gaussian(
                                shape=action['shape'],
                                summary_labels=config.summary_labels)

        # Entropy regularization
        assert config.entropy_regularization is None or config.entropy_regularization >= 0.0
        self.entropy_regularization = config.entropy_regularization

        super(DistributionModel, self).__init__(states_spec=states_spec,
                                                actions_spec=actions_spec,
                                                network_spec=network_spec,
                                                config=config)
Esempio n. 5
0
    def initialize(self, custom_getter):
        super(DistributionModel, self).initialize(custom_getter)

        # Network
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Distributions
        self.distributions = dict()
        for name, action in self.actions_spec.items():
            with tf.name_scope(name=(name + '-distribution')):

                if self.distributions_spec is not None and name in self.distributions_spec:
                    kwargs = dict(action)
                    kwargs['summary_labels'] = self.summary_labels
                    self.distributions[name] = Distribution.from_spec(
                        spec=self.distributions_spec[name], kwargs=kwargs)

                elif action['type'] == 'bool':
                    self.distributions[name] = Bernoulli(
                        shape=action['shape'],
                        summary_labels=self.summary_labels)

                elif action['type'] == 'int':
                    self.distributions[name] = Categorical(
                        shape=action['shape'],
                        num_actions=action['num_actions'],
                        summary_labels=self.summary_labels)

                elif action['type'] == 'float':
                    if 'min_value' in action:
                        self.distributions[name] = Beta(
                            shape=action['shape'],
                            min_value=action['min_value'],
                            max_value=action['max_value'],
                            summary_labels=self.summary_labels)

                    else:
                        self.distributions[name] = Gaussian(
                            shape=action['shape'],
                            summary_labels=self.summary_labels)

        # Network internals
        self.internal_inputs.extend(self.network.internal_inputs())
        self.internal_inits.extend(self.network.internal_inits())

        # KL divergence function
        self.fn_kl_divergence = tf.make_template(name_='kl-divergence',
                                                 func_=self.tf_kl_divergence,
                                                 custom_getter_=custom_getter)
Esempio n. 6
0
    def create_distributions(self):
        """
        Creates and returns the Distribution objects based on self.distributions_spec.

        Returns: Dict of distributions according to self.distributions_spec.
        """
        distributions = dict()
        for name in sorted(self.actions_spec):
            action = self.actions_spec[name]

            if self.distributions_spec is not None and name in self.distributions_spec:
                kwargs = dict(action)
                kwargs['scope'] = name
                kwargs['summary_labels'] = self.summary_labels
                distributions[name] = Distribution.from_spec(
                    spec=self.distributions_spec[name], kwargs=kwargs)

            elif action['type'] == 'bool':
                distributions[name] = Bernoulli(
                    shape=action['shape'],
                    scope=name,
                    summary_labels=self.summary_labels)

            elif action['type'] == 'int':
                distributions[name] = Categorical(
                    shape=action['shape'],
                    num_actions=action['num_actions'],
                    scope=name,
                    summary_labels=self.summary_labels)

            elif action['type'] == 'float':
                if 'min_value' in action:
                    distributions[name] = Beta(
                        shape=action['shape'],
                        min_value=action['min_value'],
                        max_value=action['max_value'],
                        scope=name,
                        summary_labels=self.summary_labels)

                else:
                    distributions[name] = Gaussian(
                        shape=action['shape'],
                        scope=name,
                        summary_labels=self.summary_labels)

        return distributions
Esempio n. 7
0
    def generate_distributions(actions_spec, distributions_spec,
                               summary_labels):
        distributions = dict()
        for name, action in actions_spec.items():
            with tf.name_scope(name=(name + '-distribution')):

                if distributions_spec is not None and name in distributions_spec:
                    kwargs = dict(action)
                    kwargs['summary_labels'] = summary_labels
                    distributions[name] = Distribution.from_spec(
                        spec=distributions_spec[name], kwargs=kwargs)

                elif action['type'] == 'bool':
                    distributions[name] = Bernoulli(
                        shape=action['shape'], summary_labels=summary_labels)

                elif action['type'] == 'int':
                    distributions[name] = Categorical(
                        shape=action['shape'],
                        num_actions=action['num_actions'],
                        summary_labels=summary_labels)

                elif action['type'] == 'float':
                    if 'min_value' in action:
                        distributions[name] = Beta(
                            shape=action['shape'],
                            min_value=action['min_value'],
                            max_value=action['max_value'],
                            summary_labels=summary_labels)

                    else:
                        distributions[name] = Gaussian(
                            shape=action['shape'],
                            summary_labels=summary_labels)

        return distributions