def create_distributions(self): distributions = dict() for name, action in self.actions_spec.items(): if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = self.summary_labels distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], summary_labels=self.summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=self.summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], summary_labels=self.summary_labels) return distributions
def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: if 'distribution' in action: kwargs = dict(action) self.distribution[name] = Distribution.from_config( config=action.distribution, kwargs=kwargs) elif action.continuous: if action.min_value is None: assert action.max_value is None self.distribution[name] = Gaussian(shape=action.shape) else: assert action.max_value is not None self.distribution[name] = Beta(min_value=action.min_value, max_value=action.max_value, shape=action.shape) else: self.distribution[name] = Categorical( shape=action.shape, num_actions=action.num_actions) # baseline if config.baseline is None: self.baseline = None else: self.baseline = Baseline.from_config(config=config.baseline) # advantage estimation self.gae_rewards = config.gae_rewards self.gae_lambda = config.gae_lambda self.normalize_rewards = config.normalize_rewards super(PolicyGradientModel, self).__init__(config)
def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: if 'distribution' in action: if not action.continuous: kwargs = dict(num_actions=action.num_actions) elif 'min_value' in action: kwargs = dict(min_value=action.min_value, max_value=action.max_value) else: kwargs = dict() self.distribution[name] = Distribution.from_config(config=action.distribution, kwargs=kwargs) # elif 'min_value' in action: # ... elif action.continuous: self.distribution[name] = Gaussian() else: self.distribution[name] = Categorical(num_actions=action.num_actions) # baseline if config.baseline is None: self.baseline = None else: self.baseline = Baseline.from_config(config=config.baseline) super(PolicyGradientModel, self).__init__(config) # advantage estimation self.generalized_advantage_estimation = config.generalized_advantage_estimation if self.generalized_advantage_estimation: self.gae_lambda = config.gae_lambda self.normalize_advantage = config.normalize_advantage
def __init__(self, states_spec, actions_spec, network_spec, config): with tf.name_scope(name=config.scope): # Network self.network = Network.from_spec( spec=network_spec, kwargs=dict(summary_labels=config.summary_labels)) # Distributions self.distributions = dict() for name, action in actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if config.distributions is not None and name in config.distributions: kwargs = dict(action) kwargs['summary_labels'] = config.summary_labels self.distributions[name] = Distribution.from_spec( spec=config.distributions[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=config.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=config.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=config.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=config.summary_labels) # Entropy regularization assert config.entropy_regularization is None or config.entropy_regularization >= 0.0 self.entropy_regularization = config.entropy_regularization super(DistributionModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def initialize(self, custom_getter): super(DistributionModel, self).initialize(custom_getter) # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Distributions self.distributions = dict() for name, action in self.actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = self.summary_labels self.distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=self.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=self.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=self.summary_labels) # Network internals self.internal_inputs.extend(self.network.internal_inputs()) self.internal_inits.extend(self.network.internal_inits()) # KL divergence function self.fn_kl_divergence = tf.make_template(name_='kl-divergence', func_=self.tf_kl_divergence, custom_getter_=custom_getter)
def create_distributions(self): """ Creates and returns the Distribution objects based on self.distributions_spec. Returns: Dict of distributions according to self.distributions_spec. """ distributions = dict() for name in sorted(self.actions_spec): action = self.actions_spec[name] if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['scope'] = name kwargs['summary_labels'] = self.summary_labels distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], scope=name, summary_labels=self.summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], scope=name, summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], scope=name, summary_labels=self.summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], scope=name, summary_labels=self.summary_labels) return distributions
def generate_distributions(actions_spec, distributions_spec, summary_labels): distributions = dict() for name, action in actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if distributions_spec is not None and name in distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = summary_labels distributions[name] = Distribution.from_spec( spec=distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], summary_labels=summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], summary_labels=summary_labels) return distributions