def create_distributions(self): distributions = dict() for name, action in self.actions_spec.items(): if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = self.summary_labels distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], summary_labels=self.summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=self.summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], summary_labels=self.summary_labels) return distributions
def __init__(self, states_spec, actions_spec, network_spec, config): with tf.name_scope(name=config.scope): # Network self.network = Network.from_spec( spec=network_spec, kwargs=dict(summary_labels=config.summary_labels)) # Distributions self.distributions = dict() for name, action in actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if config.distributions is not None and name in config.distributions: kwargs = dict(action) kwargs['summary_labels'] = config.summary_labels self.distributions[name] = Distribution.from_spec( spec=config.distributions[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=config.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=config.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=config.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=config.summary_labels) # Entropy regularization assert config.entropy_regularization is None or config.entropy_regularization >= 0.0 self.entropy_regularization = config.entropy_regularization super(DistributionModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def initialize(self, custom_getter): super(DistributionModel, self).initialize(custom_getter) # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Distributions self.distributions = dict() for name, action in self.actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = self.summary_labels self.distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=self.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=self.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=self.summary_labels) # Network internals self.internal_inputs.extend(self.network.internal_inputs()) self.internal_inits.extend(self.network.internal_inits()) # KL divergence function self.fn_kl_divergence = tf.make_template(name_='kl-divergence', func_=self.tf_kl_divergence, custom_getter_=custom_getter)
def create_distributions(self): """ Creates and returns the Distribution objects based on self.distributions_spec. Returns: Dict of distributions according to self.distributions_spec. """ distributions = dict() for name in sorted(self.actions_spec): action = self.actions_spec[name] if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['scope'] = name kwargs['summary_labels'] = self.summary_labels distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], scope=name, summary_labels=self.summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], scope=name, summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], scope=name, summary_labels=self.summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], scope=name, summary_labels=self.summary_labels) return distributions
def generate_distributions(actions_spec, distributions_spec, summary_labels): distributions = dict() for name, action in actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if distributions_spec is not None and name in distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = summary_labels distributions[name] = Distribution.from_spec( spec=distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': distributions[name] = Bernoulli( shape=action['shape'], summary_labels=summary_labels) elif action['type'] == 'int': distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=summary_labels) elif action['type'] == 'float': if 'min_value' in action: distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=summary_labels) else: distributions[name] = Gaussian( shape=action['shape'], summary_labels=summary_labels) return distributions