def initialize(self, custom_getter): super(PGModel, self).initialize(custom_getter) # Baseline if self.baseline_spec is None: assert self.baseline_mode is None elif all(name in self.states_spec for name in self.baseline_spec): # Implies AggregatedBaseline. assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=self.baseline_spec) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=self.baseline_spec, kwargs=dict(summary_labels=self.summary_labels)) # Baseline optimizer if self.baseline_optimizer_spec is not None: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=self.baseline_optimizer_spec) # TODO: Baseline internal states !!! (see target_network q_model) # Reward estimation self.fn_reward_estimation = tf.make_template( name_='reward-estimation', func_=self.tf_reward_estimation, custom_getter_=custom_getter) # Baseline loss self.fn_baseline_loss = tf.make_template(name_='baseline-loss', func_=self.tf_baseline_loss, custom_getter_=custom_getter)
def __init__(self, baselines, scope='aggregated-baseline', summary_labels=()): """ Aggregated baseline. Args: baselines: Dict of per-state baseline specification dicts """ self.baselines = dict() for name in sorted(baselines): self.baselines[name] = Baseline.from_spec( spec=baselines[name], kwargs=dict(summary_labels=summary_labels)) self.linear = Linear(size=1, bias=0.0, scope='prediction', summary_labels=summary_labels) super(AggregatedBaseline, self).__init__(scope, summary_labels)
def __init__(self, baselines, scope='aggregated-baseline', summary_labels=()): """ Aggregated baseline. Args: baselines: Dict of per-state baseline specification dicts """ with tf.name_scope(name=scope): self.baselines = dict() for name, baseline_spec in baselines.items(): with tf.name_scope(name=(name + '-baseline')): self.baselines[name] = Baseline.from_spec( spec=baseline_spec, kwargs=dict(summary_labels=summary_labels) ) self.linear = Linear(size=1, bias=0.0, scope='prediction') super(AggregatedBaseline, self).__init__(scope, summary_labels)
def __init__(self, states_spec, actions_spec, network_spec, config): # Baseline mode assert config.baseline_mode is None or config.baseline_mode in ( 'states', 'network') self.baseline_mode = config.baseline_mode with tf.name_scope(name=config.scope): # Baseline if config.baseline is None: assert self.baseline_mode is None self.baseline = None elif all(name in states_spec for name in config.baseline): # Implies AggregatedBaseline assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=config.baseline) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=config.baseline, kwargs=dict(summary_labels=config.summary_labels)) # Baseline optimizer if config.baseline_optimizer is None: self.baseline_optimizer = None else: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=config.baseline_optimizer) # Generalized advantage function assert config.gae_lambda is None or ( 0.0 <= config.gae_lambda <= 1.0 and self.baseline_mode is not None) self.gae_lambda = config.gae_lambda super(PGModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def initialize(self, custom_getter): super(PGModel, self).initialize(custom_getter) # Baseline if self.baseline is None: assert self.baseline_mode is None self.baseline = None elif all(name in self.states_spec for name in self.baseline): # Implies AggregatedBaseline. assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=self.baseline) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=self.baseline, kwargs=dict(summary_labels=self.summary_labels)) # Baseline optimizer if self.baseline_optimizer is None: self.baseline_optimizer = None else: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=self.baseline_optimizer) # TODO: Baseline internal states !!! (see target_network q_model) # Reward estimation self.fn_reward_estimation = tf.make_template( name_=(self.scope + '/reward-estimation'), func_=self.tf_reward_estimation, custom_getter_=custom_getter) # PG loss per instance function self.fn_pg_loss_per_instance = tf.make_template( name_=(self.scope + '/pg-loss-per-instance'), func_=self.tf_pg_loss_per_instance, custom_getter_=custom_getter)