def api_update(self): # Set global tensors Module.update_tensors( deterministic=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), independent=tf.constant(value=False, dtype=util.tf_dtype(dtype='bool')), optimization=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), timestep=self.global_timestep, episode=self.global_episode, update=self.global_update) # Core update: retrieve update operation updated = self.core_update() with tf.control_dependencies(control_inputs=(updated, )): # Function-level identity operation for retrieval (plus enforce dependency) timestep = util.identity_operation( x=self.global_timestep, operation_name='timestep-output') episode = util.identity_operation(x=self.global_episode, operation_name='episode-output') update = util.identity_operation(x=self.global_update, operation_name='update-output') return timestep, episode, update
def tf_apply(self, x): def first_sequence(): assignment = self.has_previous.assign(value=tf.constant( value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) multiples = tuple(self.length if dims == self.axis + 1 else 1 for dims in range(util.rank(x=current))) return tf.tile(input=x, multiples=multiples) def later_sequence(): tf.concat(values=(self.previous, x)) if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) return tf.concat(values=(self.previous, current), axis=(self.axis + 1)) sequence = self.cond(pred=self.has_previous, true_fn=later_sequence, false_fn=first_sequence) assignment = self.previous.assign(value=tf.concat( values=(self.previous, x), axis=0)[-self.length + 1:], read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): return util.identity_operation(x=sequence)
def tf_core_update(self): Module.update_tensor(name='update', tensor=self.global_update) true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')) one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long')) # Retrieve batch batch_size = self.update_batch_size.value() if self.update_unit == 'timesteps': # Timestep-based batch # Dependency horizon past_horizon = self.policy.past_horizon(is_optimization=True) past_horizon = tf.math.maximum( x=past_horizon, y=self.baseline_policy.past_horizon(is_optimization=True) ) future_horizon = self.estimator.future_horizon() indices = self.memory.retrieve_timesteps( n=batch_size, past_horizon=past_horizon, future_horizon=future_horizon ) elif self.update_unit == 'episodes': # Episode-based batch indices = self.memory.retrieve_episodes(n=batch_size) # Optimization optimized = self.optimize(indices=indices) # Increment update with tf.control_dependencies(control_inputs=(optimized,)): assignment = self.global_update.assign_add(delta=one, read_value=False) with tf.control_dependencies(control_inputs=(assignment,)): return util.identity_operation(x=true)
def tf_step(self, variables, arguments, fn_loss, **kwargs): """ Keyword Args: arguments: Dict of arguments for passing to fn_loss as **kwargs. fn_loss: A callable taking arguments as kwargs and returning the loss op. """ # Trivial operation to enforce control dependency previous_variables = [ util.identity_operation(x=variable) for variable in variables ] # Force loss value to be calculated. with tf.control_dependencies(control_inputs=previous_variables): loss = fn_loss(**arguments) # The actual tensorflow minimize op. applied = self.optimizer.minimize(loss=loss, var_list=variables) # colocate_gradients_with_ops=True # Return deltas after actually having change the variables. with tf.control_dependencies(control_inputs=(applied, )): return [ variable - previous_variable for variable, previous_variable in zip(variables, previous_variables) ]
def apply_step(): # lambda = sqrt(c' / c) lagrange_multiplier = tf.sqrt(x=(constant / learning_rate)) # delta = delta' / lambda estimated_deltas = [delta / lagrange_multiplier for delta in deltas] # improvement = grad(loss) * delta (= loss_new - loss_old) estimated_improvement = tf.add_n(inputs=[ tf.reduce_sum(input_tensor=(grad * delta)) for grad, delta in zip(loss_gradients, estimated_deltas) ]) # Apply natural gradient improvement. applied = self.apply_step(variables=variables, deltas=estimated_deltas) with tf.control_dependencies(control_inputs=(applied,)): # Trivial operation to enforce control dependency estimated_delta = [ util.identity_operation(x=estimated_delta) for estimated_delta in estimated_deltas ] if return_estimated_improvement: return estimated_delta, estimated_improvement else: return estimated_delta
def tf_apply(self, x): def first_delta(): assignment = self.has_previous.assign(value=tf.constant( value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): return tf.concat(values=(tf.zeros_like(input=x[:1]), x[1:] - x[:-1]), axis=0) # dtype=util.tf_dtype(dtype='???')) def later_delta(): return x - tf.concat(values=(self.previous, x[:-1]), axis=0) delta = self.cond(pred=self.has_previous, true_fn=later_delta, false_fn=first_delta) assignment = self.previous.assign(value=x[-1:], read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): if self.concatenate is False: return util.identity_operation(x=delta) else: return tf.concat(values=(x, delta), axis=(self.concatenate + 1))
def tf_step(self, variables, **kwargs): """ Creates the TensorFlow operations for performing an optimization step. Args: variables: List of variables to optimize. **kwargs: Additional arguments passed on to the internal optimizer. Returns: List of delta tensors corresponding to the updates for each optimized variable. """ deltas = self.optimizer.step(variables=variables, **kwargs) with tf.control_dependencies(control_inputs=deltas): clipping_value = self.clipping_value.value() clipped_deltas = list() exceeding_deltas = list() for delta in deltas: clipped_delta = tf.clip_by_value( t=delta, clip_value_min=-clipping_value, clip_value_max=clipping_value) clipped_deltas.append(clipped_delta) exceeding_deltas.append(clipped_delta - delta) applied = self.apply_step(variables=variables, deltas=exceeding_deltas) with tf.control_dependencies(control_inputs=(applied, )): return [ util.identity_operation(x=delta) for delta in clipped_deltas ]
def tf_apply(self, x): assertion = tf.debugging.assert_equal( x=tf.shape(input=x)[0], y=1, message= "Deltafier preprocessor currently not compatible with batched Agent.act." ) def first_delta(): assignment = self.has_previous.assign(value=tf.constant( value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): return tf.concat(values=(tf.zeros_like(input=x[:1]), x[1:] - x[:-1]), axis=0) def later_delta(): return x - tf.concat(values=(self.previous, x[:-1]), axis=0) with tf.control_dependencies(control_inputs=(assertion, )): delta = self.cond(pred=self.has_previous, true_fn=later_delta, false_fn=first_delta) assignment = self.previous.assign(value=x[-1:], read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): if self.concatenate is False: return util.identity_operation(x=delta) else: return tf.concat(values=(x, delta), axis=(self.concatenate + 1))
def tf_apply(self, x): assertion = tf.debugging.assert_equal( x=tf.shape(input=x)[0], y=1, message= "Sequence preprocessor currently not compatible with batched Agent.act." ) def first_timestep(): assignment = self.has_previous.assign(value=tf.constant( value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) multiples = tuple(self.length if dims == self.axis + 1 else 1 for dims in range(util.rank(x=current))) return tf.tile(input=current, multiples=multiples) def other_timesteps(): if self.concatenate: current = x else: current = tf.expand_dims(input=x, axis=(self.axis + 1)) return tf.concat(values=(self.previous, current), axis=(self.axis + 1)) with tf.control_dependencies(control_inputs=(assertion, )): xs = self.cond(pred=self.has_previous, true_fn=other_timesteps, false_fn=first_timestep) if self.concatenate: begin = tuple( self.input_spec['shape'][dims - 1] if dims == self.axis + 1 else 0 for dims in range(util.rank(x=xs))) else: begin = tuple(1 if dims == self.axis + 1 else 0 for dims in range(util.rank(x=xs))) assignment = self.previous.assign(value=tf.slice( input_=xs, begin=begin, size=self.previous.shape), read_value=False) with tf.control_dependencies(control_inputs=(assignment, )): return util.identity_operation(x=xs)
def apply_sync(): update_weight = self.update_weight.value() deltas = list() for source_variable, target_variable in zip( source_variables, variables): delta = update_weight * (source_variable - target_variable) deltas.append(delta) applied = self.apply_step(variables=variables, deltas=deltas) last_sync_updated = self.last_sync.assign(value=timestep) with tf.control_dependencies(control_inputs=(applied, last_sync_updated)): # Trivial operation to enforce control dependency return [util.identity_operation(x=delta) for delta in deltas]
def tf_apply(self, x, initial=None): zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long')) one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long')) dependency_starts = Module.retrieve_tensor(name='dependency_starts') dependency_lengths = Module.retrieve_tensor(name='dependency_lengths') if util.tf_dtype(dtype='long') in (tf.int32, tf.int64): batch_size = tf.shape(input=dependency_starts, out_type=util.tf_dtype(dtype='long'))[0] else: batch_size = tf.dtypes.cast(x=tf.shape(input=dependency_starts)[0], dtype=util.tf_dtype(dtype='long')) zeros = tf.zeros(shape=(batch_size, ), dtype=util.tf_dtype(dtype='long')) ones = tf.ones(shape=(batch_size, ), dtype=util.tf_dtype(dtype='long')) # maximum_iterations = tf.math.reduce_max(input_tensor=lengths, axis=0) horizon = self.dependency_horizon.value() + one # including 0th step starts = dependency_starts + tf.maximum( x=(dependency_lengths - horizon), y=zeros) lengths = dependency_lengths - tf.maximum( x=(dependency_lengths - horizon), y=zeros) horizon = tf.minimum(x=horizon, y=tf.math.reduce_max(input_tensor=lengths, axis=0)) if self.processing == 'cumulative': def body(indices, remaining, xs): current_x = tf.gather(params=x, indices=indices) current_x = tf.expand_dims(input=current_x, axis=1) xs = tf.concat(values=(xs, current_x), axis=1) remaining -= tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return indices, remaining, xs initial_xs = tf.zeros( shape=((batch_size, 0) + self.output_spec['shape']), dtype=util.tf_dtype(dtype=self.output_spec['type'])) final_indices, final_remaining, final_xs = self.while_loop( cond=util.tf_always_true, body=body, loop_vars=(starts, lengths, initial_xs), back_prop=True, maximum_iterations=horizon) # initial_xs = tf.gather(params=x, indices=starts) # initial_xs = tf.expand_dims(input=initial_xs, axis=1) # missing = tf.expand_dims(input=horizon, axis=0) - lengths # missing -= tf.where(condition=tf.math.equal(x=missing, y=zeros), x=zeros, y=ones) # starts += tf.where(condition=tf.math.equal(x=missing, y=zeros), x=ones, y=zeros) # final_indices, final_counter, final_xs = self.while_loop( # cond=util.tf_always_true, body=body, loop_vars=(starts, missing, initial_xs), # back_prop=True, maximum_iterations=(horizon - one) # ) elif self.processing == 'iterative': def body(indices, remaining, current_x, current_aggregates): current_x = tf.gather(params=x, indices=indices) next_x, next_aggregates = self.iterative_step( x=current_x, previous=current_aggregates) with tf.control_dependencies(control_inputs=(current_x, next_x)): is_finished = tf.math.equal(x=remaining, y=zeros) if isinstance(next_aggregates, dict): for name, current_aggregate, next_aggregate in util.zip_items( current_aggregates, next_aggregates): condition = is_finished for _ in range(util.rank(x=current_aggregate) - 1): condition = tf.expand_dims(input=condition, axis=1) next_aggregates[name] = tf.where( condition=condition, x=current_aggregate, y=next_aggregate) else: condition = is_finished for _ in range(util.rank(x=current_aggregates) - 1): condition = tf.expand_dims(input=condition, axis=1) next_aggregates = tf.where(condition=condition, x=current_aggregates, y=next_aggregates) remaining -= tf.where(condition=is_finished, x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return indices, remaining, next_x, next_aggregates initial_x = tf.zeros( shape=((batch_size, ) + self.output_spec['shape']), dtype=util.tf_dtype(dtype=self.output_spec['type'])) if initial is None: initial_aggregates = self.initial_values() else: initial_aggregates = initial final_indices, final_remaining, final_x, final_aggregates = self.while_loop( cond=util.tf_always_true, body=body, loop_vars=(starts, lengths, initial_x, initial_aggregates), back_prop=True, maximum_iterations=horizon) # assertions = [ # tf.debugging.assert_equal( # x=final_indices, y=(tf.math.cumsum(x=dependency_lengths) - ones) # ), # tf.debugging.assert_equal( # x=tf.math.reduce_sum(input_tensor=final_remaining, axis=0), y=zero # ) # ] # with tf.control_dependencies(control_inputs=assertions): if self.processing == 'cumulative': return super().tf_apply(x=self.cumulative_apply(xs=final_xs)) elif self.processing == 'iterative': if initial is None: return util.identity_operation(x=super().tf_apply(x=final_x)) else: return util.identity_operation(x=super().tf_apply( x=final_x)), final_aggregates
def tf_core_update(self): Module.update_tensor(name='update', tensor=self.global_update) Module.global_summary_step = 'update' true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')) one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long')) assignment = self.global_update.assign_add(delta=one, read_value=False) # Retrieve batch with tf.control_dependencies(control_inputs=(assignment, )): batch_size = self.update_batch_size.value() if self.update_unit == 'timesteps': # Timestep-based batch # Dependency horizon past_horizon = self.policy.dependency_horizon( is_optimization=True) if self.baseline_policy is not None: past_horizon = tf.math.maximum( x=past_horizon, y=self.baseline_policy.dependency_horizon( is_optimization=True)) future_horizon = self.estimator.horizon.value() + one indices = self.memory.retrieve_timesteps( n=batch_size, past_padding=past_horizon, future_padding=future_horizon) elif self.update_unit == 'episodes': # Episode-based batch indices = self.memory.retrieve_episodes(n=batch_size) # Optimization optimized = self.optimize(indices=indices) # dependency_horizon = self.policy.dependency_horizon(is_optimization=True) # if self.baseline_policy is not None: # dependency_horizon = tf.maximum( # x=dependency_horizon, # y=self.baseline_policy.dependency_horizon(is_optimization=True) # ) # # Retrieve dependency horizon # horizon change: see timestep-based batch sampling # starts, lengths, states, internals = self.memory.predecessors( # indices=indices, horizon=dependency_horizon, sequence_values='states', # initial_values='internals' # ) # actions, reward = self.memory.retrieve(indices=indices, values=('actions', 'reward')) # Module.update_tensors(dependency_starts=starts, dependency_lengths=lengths) # # Stop gradients of batch before optimization # states = util.fmap(function=tf.stop_gradient, xs=states) # internals = util.fmap(function=tf.stop_gradient, xs=internals) # actions = util.fmap(function=tf.stop_gradient, xs=actions) # reward = tf.stop_gradient(input=reward) # # Optimization # optimized = self.optimize( # indices=indices, states=states, internals=internals, actions=actions, reward=reward # ) with tf.control_dependencies(control_inputs=(optimized, )): return util.identity_operation(x=true)
def tf_step(self, variables, arguments, fn_loss, **kwargs): """ Creates the TensorFlow operations for performing an optimization step. Args: variables: List of variables to optimize. arguments: Dict of arguments for callables, like fn_loss. fn_loss: A callable returning the loss of the current model. **kwargs: Additional arguments, not used. Returns: List of delta tensors corresponding to the updates for each optimized variable. """ learning_rate = self.learning_rate.value() unperturbed_loss = fn_loss(**arguments) deltas = [tf.zeros_like(tensor=variable) for variable in variables] previous_perturbations = [ tf.zeros_like(tensor=variable) for variable in variables ] if self.unroll_loop: # Unrolled for loop for sample in range(self.num_samples): with tf.control_dependencies(control_inputs=deltas): perturbations = [ tf.random_normal(shape=util.shape(variable)) * learning_rate for variable in variables ] perturbation_deltas = [ pert - prev_pert for pert, prev_pert in zip( perturbations, previous_perturbations) ] applied = self.apply_step(variables=variables, deltas=perturbation_deltas) previous_perturbations = perturbations with tf.control_dependencies(control_inputs=(applied, )): perturbed_loss = fn_loss(**arguments) direction = tf.sign(x=(unperturbed_loss - perturbed_loss)) deltas = [ delta + direction * perturbation for delta, perturbation in zip(deltas, perturbations) ] else: # TensorFlow while loop def body(deltas, previous_perturbations): with tf.control_dependencies(control_inputs=deltas): perturbations = [ tf.random_normal(shape=util.shape(variable)) * learning_rate for variable in variables ] perturbation_deltas = [ pert - prev_pert for pert, prev_pert in zip( perturbations, previous_perturbations) ] applied = self.apply_step(variables=variables, deltas=perturbation_deltas) with tf.control_dependencies(control_inputs=(applied, )): perturbed_loss = fn_loss(**arguments) direction = tf.sign(x=(unperturbed_loss - perturbed_loss)) deltas = [ delta + direction * perturbation for delta, perturbation in zip(deltas, perturbations) ] return deltas, perturbations num_samples = self.num_samples.value() deltas, perturbations = self.while_loop( cond=util.tf_always_true, body=body, loop_vars=(deltas, previous_perturbations), maximum_iterations=num_samples) with tf.control_dependencies(control_inputs=deltas): num_samples = tf.dtypes.cast(x=num_samples, dtype=util.tf_dtype(dtype='float')) deltas = [delta / num_samples for delta in deltas] perturbation_deltas = [ delta - pert for delta, pert in zip(deltas, perturbations) ] applied = self.apply_step(variables=variables, deltas=perturbation_deltas) with tf.control_dependencies(control_inputs=(applied, )): # Trivial operation to enforce control dependency return [util.identity_operation(x=delta) for delta in deltas]
def api_experience(self): # Inputs states = self.states_input internals = self.internals_input auxiliaries = self.auxiliaries_input actions = self.actions_input terminal = self.terminal_input reward = self.reward_input zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long')) # Assertions assertions = [ # terminal: type and shape tf.debugging.assert_type(tensor=terminal, tf_type=util.tf_dtype(dtype='long')), tf.debugging.assert_rank(x=terminal, rank=1), # reward: type and shape tf.debugging.assert_type(tensor=reward, tf_type=util.tf_dtype(dtype='float')), tf.debugging.assert_rank(x=reward, rank=1), # shape of terminal equals shape of reward tf.debugging.assert_equal(x=tf.shape(input=terminal), y=tf.shape(input=reward)), # buffer index is zero tf.debugging.assert_equal( x=tf.math.reduce_sum(input_tensor=self.buffer_index, axis=0), y=tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))), # at most one terminal tf.debugging.assert_less_equal( x=tf.math.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype(dtype='long')), y=tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))), # if terminal, last timestep in batch tf.debugging.assert_equal(x=tf.math.reduce_any( input_tensor=tf.math.greater(x=terminal, y=zero)), y=tf.math.greater(x=terminal[-1], y=zero)) ] batch_size = tf.shape(input=terminal)[:1] # states: type and shape for name, spec in self.states_spec.items(): assertions.append( tf.debugging.assert_type( tensor=states[name], tf_type=util.tf_dtype(dtype=spec['type']))) shape = self.unprocessed_state_shape.get(name, spec['shape']) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=states[name], out_type=tf.int32), y=tf.concat(values=(batch_size, tf.constant(value=shape, dtype=tf.int32)), axis=0))) # internals: type and shape for name, spec in self.internals_spec.items(): assertions.append( tf.debugging.assert_type( tensor=internals[name], tf_type=util.tf_dtype(dtype=spec['type']))) shape = spec['shape'] assertions.append( tf.debugging.assert_equal( x=tf.shape(input=internals[name], out_type=tf.int32), y=tf.concat(values=(batch_size, tf.constant(value=shape, dtype=tf.int32)), axis=0))) # action_masks: type and shape for name, spec in self.actions_spec.items(): if spec['type'] == 'int': name = name + '_mask' assertions.append( tf.debugging.assert_type( tensor=auxiliaries[name], tf_type=util.tf_dtype(dtype='bool'))) shape = spec['shape'] + (spec['num_values'], ) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=auxiliaries[name], out_type=tf.int32), y=tf.concat(values=(batch_size, tf.constant(value=shape, dtype=tf.int32)), axis=0))) # actions: type and shape for name, spec in self.actions_spec.items(): assertions.append( tf.debugging.assert_type( tensor=actions[name], tf_type=util.tf_dtype(dtype=spec['type']))) shape = spec['shape'] assertions.append( tf.debugging.assert_equal( x=tf.shape(input=actions[name], out_type=tf.int32), y=tf.concat(values=(batch_size, tf.constant(value=shape, dtype=tf.int32)), axis=0))) # Set global tensors Module.update_tensors( deterministic=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), independent=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), optimization=tf.constant(value=False, dtype=util.tf_dtype(dtype='bool')), timestep=self.global_timestep, episode=self.global_episode, update=self.global_update) with tf.control_dependencies(control_inputs=assertions): # Core experience: retrieve experience operation experienced = self.core_experience(states=states, internals=internals, auxiliaries=auxiliaries, actions=actions, terminal=terminal, reward=reward) with tf.control_dependencies(control_inputs=(experienced, )): # Function-level identity operation for retrieval (plus enforce dependency) timestep = util.identity_operation( x=self.global_timestep, operation_name='timestep-output') episode = util.identity_operation(x=self.global_episode, operation_name='episode-output') update = util.identity_operation(x=self.global_update, operation_name='update-output') return timestep, episode, update
def add_summary(self, label, name, tensor, pass_tensors=None, return_summaries=False, mean_variance=False, enumerate_last_rank=False): # should be "labels" !!! # label if util.is_iterable(x=label): if not all(isinstance(x, str) for x in label): raise TensorforceError.type(name='summary', argument='label', value=label) else: if not isinstance(label, str): raise TensorforceError.type(name='summary', argument='label', value=label) # name if not isinstance(name, str): raise TensorforceError.type(name='summary', argument='name', value=name) # tensor if not isinstance(tensor, tf.Tensor): raise TensorforceError.type(name='summary', argument='tensor', value=tensor) # pass_tensors if util.is_iterable(x=pass_tensors): if not all(isinstance(x, tf.Tensor) for x in pass_tensors): raise TensorforceError.type(name='summary', argument='pass_tensors', value=pass_tensors) elif pass_tensors is not None: if not isinstance(pass_tensors, tf.Tensor): raise TensorforceError.type(name='summary', argument='pass_tensors', value=pass_tensors) # enumerate_last_rank if not isinstance(enumerate_last_rank, bool): raise TensorforceError.type(name='summary', argument='enumerate_last_rank', value=tensor) if pass_tensors is None: pass_tensors = tensor # Check whether summaries are logged if self.summary_labels is None: return pass_tensors # Check whether not in while loop if 'while' in Module.global_scope: # 'cond' in Module.global_scope return pass_tensors # Check whether given label is logged if util.is_iterable(x=label): if all(x not in self.summary_labels for x in label): return pass_tensors else: if label not in self.summary_labels: return pass_tensors # Handle enumerate_last_rank if enumerate_last_rank: num_dims = util.shape(x=tensor)[-1] tensors = OrderedDict([(name + str(n), tensor[..., n]) for n in range(num_dims)]) else: tensors = OrderedDict([(name, tensor)]) if mean_variance: for name in list(tensors): tensor = tensors.pop(name) mean, variance = tf.nn.moments(x=tensor, axes=tuple( range(util.rank(x=tensor)))) tensors[name + '-mean'] = mean tensors[name + '-variance'] = variance # TensorFlow summaries summaries = list() for name, tensor in tensors.items(): shape = util.shape(x=tensor) if shape == () or shape == (-1, ): # Scalar summaries.append( tf.contrib.summary.scalar(name=name, tensor=tensor)) elif shape == (1, ) or shape == (-1, 1): # Single-value tensor as scalar tensor = tf.squeeze(input=tensor, axis=-1) summaries.append( tf.contrib.summary.scalar(name=name, tensor=tensor)) else: # General tensor as histogram summaries.append( tf.contrib.summary.histogram(name=name, tensor=tensor)) with tf.control_dependencies(control_inputs=summaries): if util.is_iterable(x=pass_tensors): return tuple( util.identity_operation(x=x) for x in pass_tensors) else: return util.identity_operation(x=pass_tensors)
def api_experience(self): # Inputs states = OrderedDict(self.states_input) internals = OrderedDict(self.internals_input) auxiliaries = OrderedDict(self.auxiliaries_input) actions = OrderedDict(self.actions_input) terminal = self.terminal_input reward = self.reward_input zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long')) true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')) batch_size = tf.shape(input=terminal)[:1] # Assertions assertions = list() # terminal: type and shape tf.debugging.assert_type( tensor=terminal, tf_type=util.tf_dtype(dtype='long'), message="Agent.experience: invalid type for terminal input." ) assertions.append(tf.debugging.assert_rank( x=terminal, rank=1, message="Agent.experience: invalid shape for terminal input." )) # reward: type and shape tf.debugging.assert_type( tensor=reward, tf_type=util.tf_dtype(dtype='float'), message="Agent.experience: invalid type for reward input." ) assertions.append(tf.debugging.assert_rank( x=reward, rank=1, message="Agent.experience: invalid shape for reward input." )) # shape of terminal equals shape of reward assertions.append(tf.debugging.assert_equal( x=tf.shape(input=terminal), y=tf.shape(input=reward), message="Agent.experience: incompatible shapes of terminal and reward input." )) # buffer index is zero assertions.append(tf.debugging.assert_equal( x=tf.math.reduce_sum(input_tensor=self.buffer_index, axis=0), y=tf.constant(value=0, dtype=util.tf_dtype(dtype='long')), message="Agent.experience: cannot be called mid-episode." )) # at most one terminal assertions.append(tf.debugging.assert_less_equal( x=tf.math.count_nonzero(input=terminal, dtype=util.tf_dtype(dtype='long')), y=tf.constant(value=1, dtype=util.tf_dtype(dtype='long')), message="Agent.experience: input contains more than one terminal." )) # if terminal, last timestep in batch assertions.append(tf.debugging.assert_equal( x=tf.math.reduce_any(input_tensor=tf.math.greater(x=terminal, y=zero)), y=tf.math.greater(x=terminal[-1], y=zero), message="Agent.experience: terminal is not the last input timestep." )) # states: type and shape for name, spec in self.states_spec.items(): spec = self.unprocessed_state_spec.get(name, spec) tf.debugging.assert_type( tensor=states[name], tf_type=util.tf_dtype(dtype=spec['type']), message="Agent.experience: invalid type for {} state input.".format(name) ) shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int')) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=states[name], out_type=util.tf_dtype(dtype='int')), y=tf.concat(values=(batch_size, shape), axis=0), message="Agent.experience: invalid shape for {} state input.".format(name) ) ) # internals: type and shape for name, spec in self.internals_spec.items(): tf.debugging.assert_type( tensor=internals[name], tf_type=util.tf_dtype(dtype=spec['type']), message="Agent.experience: invalid type for {} internal input.".format(name) ) shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int')) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=internals[name], out_type=util.tf_dtype(dtype='int')), y=tf.concat(values=(batch_size, shape), axis=0), message="Agent.experience: invalid shape for {} internal input.".format(name) ) ) # action_masks: type and shape for name, spec in self.actions_spec.items(): if spec['type'] == 'int': name = name + '_mask' tf.debugging.assert_type( tensor=auxiliaries[name], tf_type=util.tf_dtype(dtype='bool'), message="Agent.experience: invalid type for {} action-mask input.".format(name) ) shape = tf.constant( value=(spec['shape'] + (spec['num_values'],)), dtype=util.tf_dtype(dtype='int') ) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=auxiliaries[name], out_type=util.tf_dtype(dtype='int')), y=tf.concat(values=(batch_size, shape), axis=0), message="Agent.experience: invalid shape for {} action-mask input.".format( name ) ) ) assertions.append( tf.debugging.assert_equal( x=tf.reduce_all( input_tensor=tf.reduce_any( input_tensor=auxiliaries[name], axis=(len(spec['shape']) + 1) ), axis=tuple(range(len(spec['shape']) + 1)) ), y=true, message="Agent.experience: at least one action has to be valid " "for {} action-mask input.".format(name) ) ) # actions: type and shape for name, spec in self.actions_spec.items(): tf.debugging.assert_type( tensor=actions[name], tf_type=util.tf_dtype(dtype=spec['type']), message="Agent.experience: invalid type for {} action input.".format(name) ) shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int')) assertions.append( tf.debugging.assert_equal( x=tf.shape(input=actions[name], out_type=util.tf_dtype(dtype='int')), y=tf.concat(values=(batch_size, shape), axis=0), message="Agent.experience: invalid shape for {} action input.".format(name) ) ) # Set global tensors Module.update_tensors( independent=tf.constant(value=False, dtype=util.tf_dtype(dtype='bool')), deterministic=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), timestep=self.global_timestep, episode=self.global_episode, update=self.global_update ) with tf.control_dependencies(control_inputs=assertions): # Preprocessing states if any(name in self.preprocessing for name in self.states_spec): for name in self.states_spec: if name in self.preprocessing: states[name] = self.preprocessing[name].apply(x=states[name]) # Preprocessing reward if 'reward' in self.preprocessing: reward = self.preprocessing['reward'].apply(x=reward) # Core experience: retrieve experience operation experienced = self.core_experience( states=states, internals=internals, auxiliaries=auxiliaries, actions=actions, terminal=terminal, reward=reward ) with tf.control_dependencies(control_inputs=(experienced,)): # Function-level identity operation for retrieval (plus enforce dependency) timestep = util.identity_operation( x=self.global_timestep, operation_name='timestep-output' ) episode = util.identity_operation( x=self.global_episode, operation_name='episode-output' ) update = util.identity_operation( x=self.global_update, operation_name='update-output' ) return timestep, episode, update
def undo_deltas(): value = self.fn_x([-delta for delta in deltas]) with tf.control_dependencies(control_inputs=(value, )): return [util.identity_operation(x=t) for t in x_final]
def create_api_function(self, name, api_function): # Call API TensorFlow function Module.global_scope = list() Module.scope_stack = list() Module.while_counter = 0 Module.cond_counter = 0 Module.global_tensors = OrderedDict() Module.queryable_tensors = OrderedDict() if self.device is not None: self.device.__enter__() scope = tf.name_scope(name=name) Module.scope_stack.append(scope) scope.__enter__() results = api_function() self.output_tensors[name[name.index('.') + 1:]] = sorted( x.name[len(name) + 1: -9] for x in util.flatten(xs=results) ) # Function-level identity operation for retrieval query_tensors = set() for scoped_name, tensor in Module.queryable_tensors.items(): util.identity_operation(x=tensor, operation_name=(scoped_name + '-output')) assert scoped_name not in query_tensors query_tensors.add(scoped_name) self.query_tensors[name[name.index('.') + 1:]] = sorted(query_tensors) scope.__exit__(None, None, None) Module.scope_stack.pop() if self.device is not None: self.device.__exit__(None, None, None) assert len(Module.global_scope) == 0 Module.global_scope = None assert len(Module.scope_stack) == 0 Module.scope_stack = None Module.while_counter = None Module.cond_counter = None Module.global_tensors = None Module.queryable_tensors = None def fn(query=None, **kwargs): # Feed_dict dictionary feed_dict = dict() for key, arg in kwargs.items(): if arg is None: continue elif isinstance(arg, dict): # Support single nesting (for states, internals, actions) for key, arg in arg.items(): feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg else: feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg if not all(isinstance(x, str) and x.endswith('-input:0') for x in feed_dict): raise TensorforceError.value( name=api_function, argument='inputs', value=list(feed_dict) ) # Fetches value/tuple fetches = util.fmap(function=(lambda x: x.name), xs=results) if query is not None: # If additional tensors are to be fetched query = util.fmap( function=(lambda x: util.join_scopes(name, x) + '-output:0'), xs=query ) if util.is_iterable(x=fetches): fetches = tuple(fetches) + (query,) else: fetches = (fetches, query) if not util.reduce_all( predicate=(lambda x: isinstance(x, str) and x.endswith('-output:0')), xs=fetches ): raise TensorforceError.value( name=api_function, argument='outputs', value=list(fetches) ) # TensorFlow session call fetched = self.monitored_session.run(fetches=fetches, feed_dict=feed_dict) return fetched return fn
def create_api_function(self, name, api_function): # Call API TensorFlow function Module.global_scope = list() Module.global_tensors = OrderedDict() if self.device is not None: self.device.__enter__() with tf.name_scope(name=name): results = api_function() # Function-level identity operation for retrieval for scoped_name, tensor in Module.global_tensors.items(): if '/cond/' not in scoped_name and '/while/' not in scoped_name: util.identity_operation(x=tensor, operation_name=(scoped_name + '-output')) if self.device is not None: self.device.__exit__(None, None, None) Module.global_tensors = None Module.global_scope = None def fn(query=None, **kwargs): # Feed_dict dictionary feed_dict = dict() for key, arg in kwargs.items(): if arg is None: continue elif isinstance(arg, dict): # Support single nesting (for states, internals, actions) for key, arg in arg.items(): feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg else: feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg if not all( isinstance(x, str) and x.endswith('-input:0') for x in feed_dict): raise TensorforceError.unexpected() # Fetches value/tuple fetches = util.fmap(function=(lambda x: x.name), xs=results) if query is not None: # If additional tensors are to be fetched query = util.fmap(function=( lambda x: util.join_scopes(name, x) + '-output:0'), xs=query) if util.is_iterable(x=fetches): fetches = tuple(fetches) + (query, ) else: fetches = (fetches, query) if not util.reduce_all(predicate=( lambda x: isinstance(x, str) and x.endswith('-output:0')), xs=fetches): raise TensorforceError.unexpected() # TensorFlow session call fetched = self.monitored_session.run(fetches=fetches, feed_dict=feed_dict) return fetched return fn