def apply(self, *, x): if len(self.tensors) == 1: return x[self.tensors[0]] x = list(x.values()) shape = self.output_spec().shape for n, tensor in enumerate(x): for axis in range(tf_util.rank(x=tensor), len(shape)): tensor = tf.expand_dims(input=tensor, axis=axis) x[n] = tensor if self.aggregation == 'concat': x = tf.concat(values=x, axis=(self.axis + 1)) elif self.aggregation == 'product': x = tf.stack(values=x, axis=(self.axis + 1)) x = tf.reduce_prod(input_tensor=x, axis=(self.axis + 1)) elif self.aggregation == 'stack': x = tf.stack(values=x, axis=(self.axis + 1)) elif self.aggregation == 'sum': x = tf.stack(values=x, axis=(self.axis + 1)) x = tf.reduce_sum(input_tensor=x, axis=(self.axis + 1)) return x
def log_probability(self, *, parameters, action): logits = parameters['logits'] rank = tf_util.rank(x=action) action = tf.expand_dims(input=action, axis=rank) logit = tf.gather(params=logits, indices=action, batch_dims=rank) return tf.squeeze(input=logit, axis=rank)
def fn_summary(): xs = list() for variable in variables: xs.extend( tf.nn.moments(x=variable, axes=list(range( tf_util.rank(x=variable))))) return xs
def action_value(self, *, parameters, action): action_values = parameters['action_values'] rank = tf_util.rank(x=action) action = tf.expand_dims(input=action, axis=rank) action_value = tf.gather(params=action_values, indices=action, batch_dims=rank) return tf.squeeze(input=action_value, axis=rank)
def body(indices, remaining, current_x, current_internals): current_internals = internals_signature.args_to_kwargs( args=current_internals) current_x = tf.gather(params=x, indices=indices) next_x, next_internals = self.iterative_apply( x=current_x, internals=current_internals) with tf.control_dependencies(control_inputs=(current_x, next_x)): is_finished = tf.math.equal(x=remaining, y=zeros) if isinstance(next_internals, dict): for name, current_internal, next_internal in current_internals.zip_items( next_internals): condition = is_finished for _ in range( tf_util.rank(x=current_internal) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals[name] = tf.where( condition=condition, x=current_internal, y=next_internal) else: condition = is_finished for _ in range(tf_util.rank(x=current_internals) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals = tf.where(condition=condition, x=current_internals, y=next_internals) remaining -= tf.where(condition=is_finished, x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) next_internals = internals_signature.kwargs_to_args( kwargs=next_internals) return indices, remaining, next_x, next_internals
def apply(self, *, x): if self.reduction == 'concat': return tf.reshape(tensor=x, shape=(-1, self.output_spec().size)) elif self.reduction == 'max': for _ in range(tf_util.rank(x=x) - 2): x = tf.reduce_max(input_tensor=x, axis=1) return x elif self.reduction == 'mean': for _ in range(tf_util.rank(x=x) - 2): x = tf.reduce_mean(input_tensor=x, axis=1) return x elif self.reduction == 'product': for _ in range(tf_util.rank(x=x) - 2): x = tf.reduce_prod(input_tensor=x, axis=1) return x elif self.reduction == 'sum': for _ in range(tf_util.rank(x=x) - 2): x = tf.reduce_sum(input_tensor=x, axis=1) return x
def iterative_body(self, x, indices, remaining, current_x, current_internals): batch_size = tf_util.cast(x=tf.shape(input=current_x)[:1], dtype='int') zeros = tf_util.zeros(shape=batch_size, dtype='int') ones = tf_util.ones(shape=batch_size, dtype='int') batch_size = batch_size[0] current_x = tf.gather(params=x, indices=indices) next_x, next_internals = self.iterative_apply( x=current_x, internals=current_internals) with tf.control_dependencies(control_inputs=(current_x, next_x)): is_finished = tf.math.equal(x=remaining, y=zeros) if isinstance(next_internals, dict): for name, current_internal, next_internal in current_internals.zip_items( next_internals): condition = is_finished for _ in range(tf_util.rank(x=current_internal) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals[name] = tf.where(condition=condition, x=current_internal, y=next_internal) else: condition = is_finished for _ in range(tf_util.rank(x=current_internals) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals = tf.where(condition=condition, x=current_internals, y=next_internals) remaining -= tf.where(condition=is_finished, x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return x, indices, remaining, next_x, next_internals
def update(self, *, arguments, variables, **kwargs): assert self.is_initialized_given_variables assert all(variable.dtype.is_floating for variable in variables) deltas = self.step(arguments=arguments, variables=variables, **kwargs) assertions = list(deltas) if self.config.create_debug_assertions: from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \ Synchronization, UpdateModifier optimizer = self while isinstance(optimizer, UpdateModifier): if isinstance(optimizer, DoublecheckStep): break optimizer = optimizer.optimizer if not isinstance(optimizer, DoublecheckStep) and ( not isinstance(optimizer, NaturalGradient) or not optimizer.only_positive_updates) and ( not isinstance(self, Synchronization) or self.sync_frequency is None): for delta, variable in zip(deltas, variables): if '_distribution/mean/linear/' in variable.name: # Gaussian.state_value does not use mean continue # if variable.name.endswith('/bias:0') and isinstance(self, Synchronization) \ # and self.root.updates.numpy() == 0: # # Initialization values are equivalent for bias # continue assertions.append( tf.debugging.assert_equal(x=tf.math.logical_or( x=tf.math.reduce_all(input_tensor=tf.math.greater( x=tf.math.count_nonzero( input=delta, dtype=tf_util.get_dtype(type='int')), y=tf_util.constant(value=0, dtype='int'))), y=tf.reduce_all(input_tensor=tf.math.equal( x=arguments['reward'], y=tf_util.constant(value=0.0, dtype='float')))), y=tf_util.constant( value=True, dtype='bool'), message=variable.name)) with tf.control_dependencies(control_inputs=assertions): dependencies = list() if self.root.summaries == 'all' or 'update-norm' in self.root.summaries: with self.root.summarizer.as_default(): x = tf.linalg.global_norm(t_list=[ tf_util.cast(x=delta, dtype='float') for delta in deltas ]) dependencies.append( tf.summary.scalar(name='update-norm', data=x, step=self.root.updates)) if self.root.summaries == 'all' or 'updates' in self.root.summaries: with self.root.summarizer.as_default(): for var in variables: assert var.name.startswith( self.root.name + '/') and var.name[-2:] == ':0' mean_name = var.name[len(self.root.name) + 1:-2] + '-mean' var_name = var.name[len(self.root.name) + 1:-2] + '-variance' mean, variance = tf.nn.moments( x=var, axes=list(range(tf_util.rank(x=var)))) dependencies.append( tf.summary.scalar(name=mean_name, data=mean, step=self.root.updates)) dependencies.append( tf.summary.scalar(name=var_name, data=variance, step=self.root.updates)) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity( input=tf_util.constant(value=True, dtype='bool'))
def update(self, *, arguments, variables, **kwargs): assert self.is_initialized_given_variables assert all(variable.dtype.is_floating for variable in variables) deltas = self.step(arguments=arguments, variables=variables, **kwargs) operations = list(deltas) if self.config.create_debug_assertions: from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \ Synchronization, UpdateModifier optimizer = self while isinstance(optimizer, UpdateModifier): if isinstance(optimizer, DoublecheckStep): break optimizer = optimizer.optimizer if not isinstance(optimizer, DoublecheckStep) and ( not isinstance(optimizer, NaturalGradient) or not optimizer.only_positive_updates) and ( not isinstance(self, Synchronization) or self.sync_frequency is None): false = tf_util.constant(value=False, dtype='bool') zero = tf_util.constant(value=0, dtype='int') one = tf_util.constant(value=1, dtype='int') zero_float = tf_util.constant(value=0.0, dtype='float') y = tf.reduce_any(input_tensor=tf.math.not_equal( x=arguments['reward'], y=zero_float)) for index, (delta, variable) in enumerate(zip(deltas, variables)): if '_distribution/mean/linear/' in variable.name: # Gaussian.state_value does not use mean continue is_zero = tf.math.logical_and(x=tf.math.equal( x=tf.math.count_nonzero( input=delta, dtype=tf_util.get_dtype(type='int')), y=zero), y=y) index = tf_util.constant(value=index, dtype='int', shape=(1, )) index = tf.stack(values=(tf.expand_dims( input=self.zero_check_index, axis=0), index), axis=1) operations.append( tf.tensor_scatter_nd_update( tensor=self.zero_check_history, indices=index, updates=tf.expand_dims(input=is_zero, axis=0))) operations.append( tf.debugging.assert_equal(x=tf.math.reduce_any( input_tensor=tf.math.reduce_all( input_tensor=self.zero_check_history, axis=1), axis=0), y=false)) with tf.control_dependencies(control_inputs=operations): operations = [ self.zero_check_index.assign( value=tf.math.mod(x=one, y=3)) ] with tf.control_dependencies(control_inputs=operations): dependencies = list() if self.root.summaries == 'all' or 'update-norm' in self.root.summaries: with self.root.summarizer.as_default(): x = tf.linalg.global_norm(t_list=[ tf_util.cast(x=delta, dtype='float') for delta in deltas ]) dependencies.append( tf.summary.scalar(name='update-norm', data=x, step=self.root.updates)) if self.root.summaries == 'all' or 'updates' in self.root.summaries: with self.root.summarizer.as_default(): for var in variables: assert var.name[-2] == ':' if var.name.startswith(self.root.name + '/'): mean_name = var.name[len(self.root.name) + 1:-2] + '-mean' var_name = var.name[len(self.root.name) + 1:-2] + '-variance' else: mean_name = var.name[:-2] + '-mean' var_name = var.name[:-2] + '-variance' mean, variance = tf.nn.moments( x=var, axes=list(range(tf_util.rank(x=var)))) dependencies.append( tf.summary.scalar(name=mean_name, data=mean, step=self.root.updates)) dependencies.append( tf.summary.scalar(name=var_name, data=variance, step=self.root.updates)) with tf.control_dependencies(control_inputs=dependencies): return tf_util.identity( input=tf_util.constant(value=True, dtype='bool'))