def apply_sync(): dependencies = list() if not self.sync_frequency.is_constant(value=1): dependencies.append( self.next_sync.assign(value=self.sync_frequency.value(), read_value=False)) with tf.control_dependencies(control_inputs=dependencies): deltas = list() assignments = list() if self.update_weight.is_constant(value=1.0): for source_var, target_var in zip(source_variables, variables): deltas.append(source_var - target_var) assignments.append( target_var.assign(value=source_var, read_value=False)) else: update_weight = self.update_weight.value() for source_var, target_var in zip(source_variables, variables): delta = update_weight * (source_var - target_var) deltas.append(delta) assignments.append( target_var.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in deltas]
def value(self): if self.unit is None: step = None else: step = self.root.units[self.unit] parameter = self.parameter_value(step=step) dependencies = self.spec.tf_assert( x=parameter, include_type_shape=True, message='Parameter.value: invalid {{issue}} for {name} value.'. format(name=self.name)) name = 'parameters/' + self.name if self.unit is None: step = 'timesteps' else: step = self.unit dependencies.extend( self.summary(label='parameters', name=name, data=parameter, step=step)) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity(input=parameter)
def step(self, *, arguments, variables, **kwargs): deltas = self.optimizer.step(arguments=arguments, variables=variables, **kwargs) with tf.control_dependencies(control_inputs=deltas): threshold = self.threshold.value() if self.mode == 'global_norm': clipped_deltas, update_norm = tf.clip_by_global_norm( t_list=deltas, clip_norm=threshold ) else: clipped_deltas = list() for delta in deltas: if self.mode == 'norm': clipped_delta = tf.clip_by_norm(t=delta, clip_norm=threshold) elif self.mode == 'value': clipped_delta = tf.clip_by_value( t=delta, clip_value_min=-threshold, clip_value_max=threshold ) clipped_deltas.append(clipped_delta) def update_norm(): return tf.linalg.global_norm(t_list=deltas) dependencies = self.summary( label='update-norm', name='unclipped-norm', data=update_norm, step='updates' ) for variable, delta, clipped_delta in zip(variables, deltas, clipped_deltas): dependencies.append( variable.assign_add(delta=(clipped_delta - delta), read_value=False) ) with tf.control_dependencies(control_inputs=dependencies): return [tf_util.identity(input=delta) for delta in clipped_deltas]
def apply(self, *, x): assertions = list() if self.config.create_tf_assertions: assertions.append(tf.debugging.assert_less_equal( x=tf.shape(input=x)[0], y=1, message="Deltafier preprocessor currently not compatible with batched Agent.act." )) # TODO: hack for empty batch (for self.previous.assign below) extended = tf.concat(values=(self.previous, x), axis=0) def first_delta(): assignment = self.has_previous.assign( value=tf_util.constant(value=True, dtype='bool'), read_value=False ) with tf.control_dependencies(control_inputs=(assignment,)): return tf.concat(values=(tf.zeros_like(input=x[:1]), x[1:] - x[:-1]), axis=0) def later_delta(): return x - extended[:-1] with tf.control_dependencies(control_inputs=assertions): empty_batch = tf.math.equal(x=tf.shape(input=x)[0], y=0) pred = tf.math.logical_or(x=self.has_previous, y=empty_batch) delta = tf.cond(pred=pred, true_fn=later_delta, false_fn=first_delta) assignment = self.previous.assign(value=extended[-1:], read_value=False) with tf.control_dependencies(control_inputs=(assignment,)): if self.concatenate is False: return tf_util.identity(input=delta) else: return tf.concat(values=(x, delta), axis=(self.concatenate + 1))
def step(self, *, arguments, variables, fn_loss, **kwargs): learning_rate = self.learning_rate.value() unperturbed_loss = fn_loss(**arguments.to_kwargs()) deltas = [tf.zeros_like(input=variable) for variable in variables] previous_perturbations = [ tf.zeros_like(input=variable) for variable in variables ] def body(deltas, previous_perturbations): with tf.control_dependencies(control_inputs=deltas): perturbations = [ learning_rate * tf.random.normal(shape=tf_util.shape(x=variable), dtype=tf_util.get_dtype(type='float')) for variable in variables ] perturbation_deltas = [ pert - prev_pert for pert, prev_pert in zip( perturbations, previous_perturbations) ] assignments = list() for variable, delta in zip(variables, perturbation_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): perturbed_loss = fn_loss(**arguments.to_kwargs()) direction = tf.math.sign(x=(unperturbed_loss - perturbed_loss)) deltas = [ delta + direction * perturbation for delta, perturbation in zip(deltas, perturbations) ] return deltas, perturbations num_samples = self.num_samples.value() deltas, perturbations = tf.while_loop( cond=tf_util.always_true, body=body, loop_vars=(deltas, previous_perturbations), maximum_iterations=tf_util.int32(x=num_samples)) with tf.control_dependencies(control_inputs=deltas): num_samples = tf_util.cast(x=num_samples, dtype='float') deltas = [delta / num_samples for delta in deltas] perturbation_deltas = [ delta - pert for delta, pert in zip(deltas, perturbations) ] assignments = list() for variable, delta in zip(variables, perturbation_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in deltas]
def reset(self): operations = list() for layer in self.layers: if isinstance(layer, PreprocessingLayer): operations.append(layer.reset()) if len(operations) > 0: return tf_util.identity(input=operations[0]) else: return tf_util.constant(value=False, dtype='bool')
def not_empty_batch(): def first_timestep(): assignment = self.has_previous.assign( value=tf_util.constant(value=True, dtype='bool'), read_value=False ) with tf.control_dependencies(control_inputs=(assignment,)): if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) multiples = tuple( self.length if dims == self.axis + 1 else 1 for dims in range(self.output_spec().rank + 1) ) return tf.tile(input=current, multiples=multiples) def other_timesteps(): if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) return tf.concat(values=(self.previous, current), axis=(self.axis + 1)) xs = tf.cond( pred=self.has_previous, true_fn=other_timesteps, false_fn=first_timestep ) if self.concatenate: begin = tuple( self.input_spec.shape[dims - 1] if dims == self.axis + 1 else 0 for dims in range(self.output_spec().rank + 1) ) else: begin = tuple( 1 if dims == self.axis + 1 else 0 for dims in range(self.output_spec().rank + 1) ) assignment = self.previous.assign( value=tf.slice(input_=xs, begin=begin, size=self.previous.shape), read_value=False ) with tf.control_dependencies(control_inputs=(assignment,)): return tf_util.identity(input=xs)
def apply_sync(): next_sync_updated = self.next_sync.assign( value=self.sync_frequency.value(), read_value=False) with tf.control_dependencies(control_inputs=(next_sync_updated, )): update_weight = self.update_weight.value() deltas = list() assignments = list() for source_variable, target_variable in zip( source_variables, variables): delta = update_weight * (source_variable - target_variable) deltas.append(delta) assignments.append( target_variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in deltas]
def apply_step(): # lambda = sqrt(c' / c) lagrange_multiplier = tf.math.sqrt(x=(constant / learning_rate)) # delta = delta' / lambda (zero prevented via tf.cond pred below) estimated_deltas = deltas.fmap(function=(lambda delta: delta / lagrange_multiplier)) # Apply natural gradient improvement. assignments = list() for variable, delta in zip(variables, estimated_deltas.values()): assignments.append(variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): if return_estimated_improvement: # improvement = grad(loss) * delta (= loss_new - loss_old) estimated_improvement = tf.math.add_n(inputs=[ tf.math.reduce_sum(input_tensor=(loss_grad * delta)) for loss_grad, delta in zip(loss_gradients, estimated_deltas.values()) ]) return list(estimated_deltas.values()), estimated_improvement else: # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in estimated_deltas.values()]
def step(self, *, arguments, variables, fn_loss, **kwargs): learning_rate = self.learning_rate.value() unperturbed_loss = fn_loss(**arguments.to_kwargs()) if self.num_samples.is_constant(value=1): deltas = list() for variable in variables: delta = tf.random.normal(shape=variable.shape, dtype=variable.dtype) if variable.dtype == tf_util.get_dtype(type='float'): deltas.append(learning_rate * delta) else: deltas.append( tf.cast(x=learning_rate, dtype=variable.dtype) * delta) assignments = list() for variable, delta in zip(variables, deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): perturbed_loss = fn_loss(**arguments.to_kwargs()) def negate_deltas(): neg_two_float = tf_util.constant(value=-2.0, dtype='float') assignments = list() for variable, delta in zip(variables, deltas): if variable.dtype == tf_util.get_dtype(type='float'): assignments.append( variable.assign_add(delta=(neg_two_float * delta), read_value=False)) else: _ng_two_float = tf.constant(value=-2.0, dtype=variable.dtype) assignments.append( variable.assign_add(delta=(_ng_two_float * delta), read_value=False)) with tf.control_dependencies(control_inputs=assignments): return [tf.math.negative(x=delta) for delta in deltas] return tf.cond(pred=(perturbed_loss < unperturbed_loss), true_fn=(lambda: deltas), false_fn=negate_deltas) else: deltas = [tf.zeros_like(input=variable) for variable in variables] previous_perturbations = [ tf.zeros_like(input=variable) for variable in variables ] def body(deltas, previous_perturbations): with tf.control_dependencies(control_inputs=deltas): perturbations = list() for variable in variables: perturbation = tf.random.normal(shape=variable.shape, dtype=variable.dtype) if variable.dtype == tf_util.get_dtype(type='float'): perturbations.append(learning_rate * perturbation) else: perturbations.append( tf.cast(x=learning_rate, dtype=variable.dtype) * perturbation) perturbation_deltas = [ pert - prev_pert for pert, prev_pert in zip( perturbations, previous_perturbations) ] assignments = list() for variable, delta in zip(variables, perturbation_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): perturbed_loss = fn_loss(**arguments.to_kwargs()) one_float = tf_util.constant(value=1.0, dtype='float') neg_one_float = tf_util.constant(value=-1.0, dtype='float') direction = tf.where( condition=(perturbed_loss < unperturbed_loss), x=one_float, y=neg_one_float) next_deltas = list() for variable, delta, perturbation in zip( variables, deltas, perturbations): if variable.dtype == tf_util.get_dtype(type='float'): next_deltas.append(delta + direction * perturbation) else: next_deltas.append( delta + tf.cast(x=direction, dtype=variable.dtype) * perturbation) return next_deltas, perturbations num_samples = self.num_samples.value() deltas, perturbations = tf.while_loop( cond=tf_util.always_true, body=body, loop_vars=(deltas, previous_perturbations), maximum_iterations=tf_util.int32(x=num_samples)) with tf.control_dependencies(control_inputs=deltas): num_samples = tf_util.cast(x=num_samples, dtype='float') deltas = [delta / num_samples for delta in deltas] perturbation_deltas = [ delta - pert for delta, pert in zip(deltas, perturbations) ] assignments = list() for variable, delta in zip(variables, perturbation_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in deltas]
def observe(self, *, terminal, reward, parallel): zero = tf_util.constant(value=0, dtype='int') one = tf_util.constant(value=1, dtype='int') batch_size = tf_util.cast(x=tf.shape(input=terminal)[0], dtype='int') expanded_parallel = tf.expand_dims(input=tf.expand_dims(input=parallel, axis=0), axis=1) is_terminal = tf.math.greater(x=terminal[-1], y=zero) # Input assertions assertions = list() if self.config.create_tf_assertions: assertions.extend( self.terminal_spec.tf_assert( x=terminal, batch_size=batch_size, message='Agent.observe: invalid {issue} for terminal input.' )) assertions.extend( self.reward_spec.tf_assert( x=reward, batch_size=batch_size, message='Agent.observe: invalid {issue} for terminal input.' )) assertions.extend( self.parallel_spec.tf_assert( x=parallel, message='Agent.observe: invalid {issue} for parallel input.' )) # Assertion: at most one terminal num_terms = tf.math.count_nonzero( input=terminal, dtype=tf_util.get_dtype(type='int')) assertions.append( tf.debugging.assert_less_equal( x=num_terms, y=one, message= "Agent.observe: input contains more than one terminal.")) # Assertion: if terminal, last timestep in batch assertions.append( tf.debugging.assert_equal( x=tf.math.greater(x=num_terms, y=zero), y=is_terminal, message= "Agent.observe: terminal is not the last input timestep.")) with tf.control_dependencies(control_inputs=assertions): dependencies = list() # Reward summary if self.summaries == 'all' or 'reward' in self.summaries: with self.summarizer.as_default(): x = tf.math.reduce_mean(input_tensor=reward) dependencies.append( tf.summary.scalar(name='reward', data=x, step=self.timesteps)) # Update episode length/reward updates = tf.expand_dims(input=batch_size, axis=0) value = tf.tensor_scatter_nd_add(tensor=self.episode_length, indices=expanded_parallel, updates=updates) dependencies.append(self.episode_length.assign(value=value)) # sparse_delta = tf.IndexedSlices(values=batch_size, indices=parallel) # dependencies.append(self.episode_length.scatter_add(sparse_delta=sparse_delta)) sum_reward = tf.math.reduce_sum(input_tensor=reward, keepdims=True) value = tf.tensor_scatter_nd_add(tensor=self.episode_reward, indices=expanded_parallel, updates=sum_reward) dependencies.append(self.episode_reward.assign(value=value)) # sum_reward = tf.math.reduce_sum(input_tensor=reward) # sparse_delta = tf.IndexedSlices(values=sum_reward, indices=parallel) # dependencies.append(self.episode_reward.scatter_add(sparse_delta=sparse_delta)) # Core observe (before terminal handling) updated = self.core_observe(terminal=terminal, reward=reward, parallel=parallel) dependencies.append(updated) # Handle terminal (after core observe and episode reward) with tf.control_dependencies(control_inputs=dependencies): def fn_terminal(): operations = list() # Reset internals def function(spec, initial): return tf_util.constant(value=initial, dtype=spec.type) initials = self.internals_spec.fmap( function=function, cls=TensorDict, zip_values=self.initial_internals) for name, previous, initial in self.previous_internals.zip_items( initials): updates = tf.expand_dims(input=initial, axis=0) value = tf.tensor_scatter_nd_update( tensor=previous, indices=expanded_parallel, updates=updates) operations.append(previous.assign(value=value)) # sparse_delta = tf.IndexedSlices(values=initial, indices=parallel) # operations.append(previous.scatter_update(sparse_delta=sparse_delta)) # Episode length/reward summaries (before episode reward reset / episodes increment) dependencies = list() if self.summaries == 'all' or 'reward' in self.summaries: with self.summarizer.as_default(): x = tf.gather(params=self.episode_length, indices=parallel) dependencies.append( tf.summary.scalar(name='episode-length', data=x, step=self.episodes)) x = tf.gather(params=self.episode_reward, indices=parallel) dependencies.append( tf.summary.scalar(name='episode-reward', data=x, step=self.episodes)) # Reset episode length/reward with tf.control_dependencies(control_inputs=dependencies): zeros = tf_util.zeros(shape=(1, ), dtype='int') value = tf.tensor_scatter_nd_update( tensor=self.episode_length, indices=expanded_parallel, updates=zeros) operations.append(self.episode_length.assign(value=value)) # sparse_delta = tf.IndexedSlices(values=zero, indices=parallel) # operations.append(self.episode_length.scatter_update(sparse_delta=sparse_delta)) zeros = tf_util.zeros(shape=(1, ), dtype='float') value = tf.tensor_scatter_nd_update( tensor=self.episode_reward, indices=expanded_parallel, updates=zeros) operations.append(self.episode_reward.assign(value=value)) # zero_float = tf_util.constant(value=0.0, dtype='float') # sparse_delta = tf.IndexedSlices(values=zero_float, indices=parallel) # operations.append(self.episode_reward.scatter_update(sparse_delta=sparse_delta)) # Increment episodes counter operations.append( self.episodes.assign_add(delta=one, read_value=False)) return tf.group(*operations) handle_terminal = tf.cond(pred=is_terminal, true_fn=fn_terminal, false_fn=tf.no_op) with tf.control_dependencies(control_inputs=(handle_terminal, )): episodes = tf_util.identity(input=self.episodes) updates = tf_util.identity(input=self.updates) return updated, episodes, updates
def act(self, *, states, auxiliaries, parallel): batch_size = tf_util.cast(x=tf.shape(input=parallel)[0], dtype='int') # Input assertions assertions = list() if self.config.create_tf_assertions: assertions.extend( self.states_spec.tf_assert( x=states, batch_size=batch_size, message='Agent.act: invalid {issue} for {name} state input.' )) assertions.extend( self.auxiliaries_spec.tf_assert( x=auxiliaries, batch_size=batch_size, message='Agent.act: invalid {issue} for {name} input.')) assertions.extend( self.parallel_spec.tf_assert( x=parallel, batch_size=batch_size, message='Agent.act: invalid {issue} for parallel input.')) # Mask assertions if self.config.enable_int_action_masking: true = tf_util.constant(value=True, dtype='bool') for name, spec in self.actions_spec.items(): if spec.type == 'int': assertions.append( tf.debugging.assert_equal( x=tf.reduce_all( input_tensor=tf.math.reduce_any( input_tensor=auxiliaries[name]['mask'], axis=(spec.rank + 1))), y=true, message= "Agent.independent_act: at least one action has to be valid." )) with tf.control_dependencies(control_inputs=assertions): # Retrieve internals internals = self.previous_internals.fmap( function=(lambda x: tf.gather(params=x, indices=parallel)), cls=TensorDict) # Core act deterministic = tf_util.constant(value=False, dtype='bool') actions, internals = self.core_act(states=states, internals=internals, auxiliaries=auxiliaries, parallel=parallel, deterministic=deterministic, independent=False) # Action assertions assertions = list() if self.config.create_tf_assertions: assertions.extend( self.actions_spec.tf_assert(x=actions, batch_size=batch_size)) if self.config.enable_int_action_masking: for name, spec, action in self.actions_spec.zip_items(actions): if spec.type == 'int': is_valid = tf.reduce_all(input_tensor=tf.gather( params=auxiliaries[name]['mask'], indices=tf.expand_dims(input=action, axis=(spec.rank + 1)), batch_dims=(spec.rank + 1))) assertions.append( tf.debugging.assert_equal( x=is_valid, y=true, message="Action mask check.")) # Remember internals dependencies = list() for name, previous, internal in self.previous_internals.zip_items( internals): indices = tf.expand_dims(input=parallel, axis=1) value = tf.tensor_scatter_nd_update(tensor=previous, indices=indices, updates=internal) dependencies.append(previous.assign(value=value)) # sparse_delta = tf.IndexedSlices(values=internal, indices=parallel) # dependencies.append(previous.scatter_update(sparse_delta=sparse_delta)) # Increment timestep (after core act) with tf.control_dependencies(control_inputs=(actions.flatten() + internals.flatten())): dependencies.append( self.timesteps.assign_add(delta=batch_size, read_value=False)) with tf.control_dependencies(control_inputs=(dependencies + assertions)): actions = actions.fmap(function=tf_util.identity) timestep = tf_util.identity(input=self.timesteps) return actions, timestep
def reset(self): timestep = tf_util.identity(input=self.timesteps) episode = tf_util.identity(input=self.episodes) update = tf_util.identity(input=self.updates) return timestep, episode, update
def update(self, *, arguments, variables, **kwargs): assert self.is_initialized_given_variables assert all(variable.dtype.is_floating for variable in variables) deltas = self.step(arguments=arguments, variables=variables, **kwargs) dependencies = list(deltas) def fn_summary(): return tf.linalg.global_norm(t_list=[ tf_util.cast(x=delta, dtype='float') for delta in deltas ]) assertions = list(deltas) # if self.config.create_debug_assertions: # if self.__class__.__name__ != 'Synchronization': # for delta, variable in zip(deltas, variables): # if variable.shape.num_elements() <= 4: # continue # if '/policy/' in variable.name and '_distribution/' in variable.name: # continue # assertions.append(tf.debugging.assert_equal( # x=tf.math.reduce_any( # input_tensor=tf.math.not_equal(x=delta, y=tf.zeros_like(input=delta)) # ), y=tf_util.constant(value=True, dtype='bool'), message=variable.name # )) name = self.name[:self.name.index('_')] + '-update/norm' dependencies.extend( self.summary(label='update-norm', name=name, data=fn_summary, step='updates')) with tf.control_dependencies(control_inputs=assertions): def fn_summary(): xs = list() for variable in variables: xs.extend( tf.nn.moments(x=variable, axes=list(range( tf_util.rank(x=variable))))) return xs prefix = self.name[:self.name.index('_')] + '-updates/' names = list() for variable in variables: assert variable.name.startswith( self.root.name + '/') and variable.name[-2:] == ':0' names.append(prefix + variable.name[len(self.root.name) + 1:-2] + '-mean') names.append(prefix + variable.name[len(self.root.name) + 1:-2] + '-variance') dependencies.extend( self.summary(label='updates', name=names, data=fn_summary, step='updates')) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity( input=tf_util.constant(value=True, dtype='bool'))
def apply(self, *, x, horizons, internals): zero = tf_util.constant(value=0, dtype='int') one = tf_util.constant(value=1, dtype='int') batch_size = tf_util.cast(x=tf.shape(input=horizons)[0], dtype='int') zeros = tf_util.zeros(shape=(batch_size, ), dtype='int') ones = tf_util.ones(shape=(batch_size, ), dtype='int') # including 0th step horizon = self.horizon.value() + one # in case of longer horizon than necessary (e.g. main vs baseline policy) starts = horizons[:, 0] + tf.maximum(x=(horizons[:, 1] - horizon), y=zeros) lengths = horizons[:, 1] - tf.maximum(x=(horizons[:, 1] - horizon), y=zeros) horizon = tf.minimum(x=horizon, y=tf.math.reduce_max(input_tensor=lengths, axis=0)) output_spec = self.output_spec() if self.temporal_processing == 'cumulative': if self.horizon.is_constant(value=0): x = self.iterative_apply(xs=x, lengths=ones) else: def body(x, indices, remaining, xs): current_x = tf.gather(params=x, indices=indices) current_x = tf.expand_dims(input=current_x, axis=1) xs = tf.concat(values=(xs, current_x), axis=1) remaining -= tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return x, indices, remaining, xs initial_xs = tf_util.zeros(shape=((batch_size, 0) + output_spec.shape), dtype=output_spec.type) _, final_indices, final_remaining, xs = tf.while_loop( cond=tf_util.always_true, body=body, loop_vars=(x, starts, lengths, initial_xs), maximum_iterations=tf_util.int64(x=horizon)) x = self.cumulative_apply(xs=xs, lengths=lengths) elif self.temporal_processing == 'iterative': if self.horizon.is_constant(value=0): x, final_internals = self.iterative_apply(x=x, internals=internals) else: initial_x = tf_util.zeros(shape=((batch_size, ) + output_spec.shape), dtype=output_spec.type) signature = self.input_signature(function='iterative_body') internals = signature['current_internals'].kwargs_to_args( kwargs=internals) _, final_indices, final_remaining, x, final_internals = tf.while_loop( cond=tf_util.always_true, body=self.iterative_body, loop_vars=(x, starts, lengths, initial_x, internals), maximum_iterations=tf_util.int32(x=horizon)) internals = signature['current_internals'].args_to_kwargs( args=final_internals) assertions = list() if self.config.create_tf_assertions: assertions.append( tf.debugging.assert_equal(x=final_indices, y=(tf.math.cumsum(x=lengths) - ones))) assertions.append( tf.debugging.assert_equal( x=tf.math.reduce_sum(input_tensor=final_remaining), y=zero)) with tf.control_dependencies(control_inputs=assertions): if self.temporal_processing == 'cumulative': return tf_util.identity(input=super().apply(x=x)) elif self.temporal_processing == 'iterative': return tf_util.identity(input=super().apply(x=x)), internals
def reset(self): false = tf_util.constant(value=False, dtype='bool') assignment = self.has_previous.assign(value=false, read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): return tf_util.identity(input=false)
def update(self, *, arguments, variables, **kwargs): assert self.is_initialized_given_variables assert all(variable.dtype.is_floating for variable in variables) deltas = self.step(arguments=arguments, variables=variables, **kwargs) assertions = list(deltas) if self.config.create_debug_assertions: from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \ Synchronization, UpdateModifier optimizer = self while isinstance(optimizer, UpdateModifier): if isinstance(optimizer, DoublecheckStep): break optimizer = optimizer.optimizer if not isinstance(optimizer, DoublecheckStep) and ( not isinstance(optimizer, NaturalGradient) or not optimizer.only_positive_updates) and ( not isinstance(self, Synchronization) or self.sync_frequency is None): for delta, variable in zip(deltas, variables): if '_distribution/mean/linear/' in variable.name: # Gaussian.state_value does not use mean continue # if variable.name.endswith('/bias:0') and isinstance(self, Synchronization) \ # and self.root.updates.numpy() == 0: # # Initialization values are equivalent for bias # continue assertions.append( tf.debugging.assert_equal(x=tf.math.logical_or( x=tf.math.reduce_all(input_tensor=tf.math.greater( x=tf.math.count_nonzero( input=delta, dtype=tf_util.get_dtype(type='int')), y=tf_util.constant(value=0, dtype='int'))), y=tf.reduce_all(input_tensor=tf.math.equal( x=arguments['reward'], y=tf_util.constant(value=0.0, dtype='float')))), y=tf_util.constant( value=True, dtype='bool'), message=variable.name)) with tf.control_dependencies(control_inputs=assertions): dependencies = list() if self.root.summaries == 'all' or 'update-norm' in self.root.summaries: with self.root.summarizer.as_default(): x = tf.linalg.global_norm(t_list=[ tf_util.cast(x=delta, dtype='float') for delta in deltas ]) dependencies.append( tf.summary.scalar(name='update-norm', data=x, step=self.root.updates)) if self.root.summaries == 'all' or 'updates' in self.root.summaries: with self.root.summarizer.as_default(): for var in variables: assert var.name.startswith( self.root.name + '/') and var.name[-2:] == ':0' mean_name = var.name[len(self.root.name) + 1:-2] + '-mean' var_name = var.name[len(self.root.name) + 1:-2] + '-variance' mean, variance = tf.nn.moments( x=var, axes=list(range(tf_util.rank(x=var)))) dependencies.append( tf.summary.scalar(name=mean_name, data=mean, step=self.root.updates)) dependencies.append( tf.summary.scalar(name=var_name, data=variance, step=self.root.updates)) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity( input=tf_util.constant(value=True, dtype='bool'))
def step(self, *, arguments, variables, fn_loss, **kwargs): # Trivial operation to enforce control dependency previous_values = list( tf_util.identity(input=variable) for variable in variables) # Remember variables before update with tf.control_dependencies(control_inputs=previous_values): with tf.GradientTape(persistent=False, watch_accessed_variables=False) as tape: for variable in variables: tape.watch(tensor=variable) loss = fn_loss(**arguments.to_kwargs()) gradients = tape.gradient( target=loss, sources=variables) # , output_gradients=initial assertions = list() gradients = list(gradients) grads_and_vars = list(zip(gradients, variables)) for n in range(len(gradients) - 1, -1, -1): if gradients[n] is None: gradients.pop(n) grads_and_vars.pop(n) elif self.config.create_tf_assertions: assertions.append( tf.debugging.assert_all_finite( x=gradients[n], message="Invalid gradient: contains inf or nan.")) assert len(gradients) > 0 with tf.control_dependencies(control_inputs=assertions): dependencies = list() if self.gradient_norm_clipping is not None: clip_norm = self.gradient_norm_clipping.value() gradients, grads_norm = tf.clip_by_global_norm( t_list=[ tf_util.cast(x=g, dtype='float') for g in gradients ], clip_norm=clip_norm) dependencies.extend( self.summary(label='update-norm', name='unclipped-gradient-norm', data=grads_norm, step='updates')) grads_and_vars = [ (grad, var) for grad, (_, var) in zip(gradients, grads_and_vars) ] applied = self.tf_optimizer.apply_gradients( grads_and_vars=grads_and_vars) dependencies.append(applied) # Return deltas after actually having change the variables. with tf.control_dependencies(control_inputs=dependencies): return [ variable - previous for variable, previous in zip(variables, previous_values) ]
def update(self, *, arguments, variables, **kwargs): assert self.is_initialized_given_variables assert all(variable.dtype.is_floating for variable in variables) deltas = self.step(arguments=arguments, variables=variables, **kwargs) operations = list(deltas) if self.config.create_debug_assertions: from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \ Synchronization, UpdateModifier optimizer = self while isinstance(optimizer, UpdateModifier): if isinstance(optimizer, DoublecheckStep): break optimizer = optimizer.optimizer if not isinstance(optimizer, DoublecheckStep) and ( not isinstance(optimizer, NaturalGradient) or not optimizer.only_positive_updates) and ( not isinstance(self, Synchronization) or self.sync_frequency is None): false = tf_util.constant(value=False, dtype='bool') zero = tf_util.constant(value=0, dtype='int') one = tf_util.constant(value=1, dtype='int') zero_float = tf_util.constant(value=0.0, dtype='float') y = tf.reduce_any(input_tensor=tf.math.not_equal( x=arguments['reward'], y=zero_float)) for index, (delta, variable) in enumerate(zip(deltas, variables)): if '_distribution/mean/linear/' in variable.name: # Gaussian.state_value does not use mean continue is_zero = tf.math.logical_and(x=tf.math.equal( x=tf.math.count_nonzero( input=delta, dtype=tf_util.get_dtype(type='int')), y=zero), y=y) index = tf_util.constant(value=index, dtype='int', shape=(1, )) index = tf.stack(values=(tf.expand_dims( input=self.zero_check_index, axis=0), index), axis=1) operations.append( tf.tensor_scatter_nd_update( tensor=self.zero_check_history, indices=index, updates=tf.expand_dims(input=is_zero, axis=0))) operations.append( tf.debugging.assert_equal(x=tf.math.reduce_any( input_tensor=tf.math.reduce_all( input_tensor=self.zero_check_history, axis=1), axis=0), y=false)) with tf.control_dependencies(control_inputs=operations): operations = [ self.zero_check_index.assign( value=tf.math.mod(x=one, y=3)) ] with tf.control_dependencies(control_inputs=operations): dependencies = list() if self.root.summaries == 'all' or 'update-norm' in self.root.summaries: with self.root.summarizer.as_default(): x = tf.linalg.global_norm(t_list=[ tf_util.cast(x=delta, dtype='float') for delta in deltas ]) dependencies.append( tf.summary.scalar(name='update-norm', data=x, step=self.root.updates)) if self.root.summaries == 'all' or 'updates' in self.root.summaries: with self.root.summarizer.as_default(): for var in variables: assert var.name[-2] == ':' if var.name.startswith(self.root.name + '/'): mean_name = var.name[len(self.root.name) + 1:-2] + '-mean' var_name = var.name[len(self.root.name) + 1:-2] + '-variance' else: mean_name = var.name[:-2] + '-mean' var_name = var.name[:-2] + '-variance' mean, variance = tf.nn.moments( x=var, axes=list(range(tf_util.rank(x=var)))) dependencies.append( tf.summary.scalar(name=mean_name, data=mean, step=self.root.updates)) dependencies.append( tf.summary.scalar(name=var_name, data=variance, step=self.root.updates)) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity( input=tf_util.constant(value=True, dtype='bool'))