def initialize_given_variables(self, *, variables): assert not self.root.is_initialized and not self.is_initialized_given_variables for module in self.this_submodules: if isinstance(module, Optimizer): module.initialize_given_variables(variables=variables) # Replace "/" with "_" to ensure TensorDict is flat self.variables_spec = TensorsSpec( ((var.name[:-2].replace('/', '_'), TensorSpec(type=tf_util.dtype(x=var, fallback_tf_dtype=True), shape=tf_util.shape(x=var))) for var in variables)) self.is_initialized_given_variables = True if self.config.create_debug_assertions: self.is_initialized = False for variable in variables: self.zero_check_history = self.variable( name='zero_check_history', spec=TensorSpec(type='bool', shape=(3, len(variables))), initializer='zeros', is_trainable=False, is_saved=False) self.zero_check_index = self.variable( name='zero_check_index', spec=TensorSpec(type='int', shape=()), initializer='zeros', is_trainable=False, is_saved=False) self.is_initialized = True
def body(deltas, previous_perturbations): with tf.control_dependencies(control_inputs=deltas): perturbations = [ learning_rate * tf.random.normal(shape=tf_util.shape(x=variable), dtype=tf_util.get_dtype(type='float')) for variable in variables ] perturbation_deltas = [ pert - prev_pert for pert, prev_pert in zip( perturbations, previous_perturbations) ] assignments = list() for variable, delta in zip(variables, perturbation_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): perturbed_loss = fn_loss(**arguments.to_kwargs()) direction = tf.math.sign(x=(unperturbed_loss - perturbed_loss)) deltas = [ delta + direction * perturbation for delta, perturbation in zip(deltas, perturbations) ] return deltas, perturbations
def step(self, *, arguments, variables, **kwargs): source_variables = kwargs['source_variables'] assert all( tf_util.shape(x=source) == tf_util.shape(x=target) for source, target in zip(source_variables, variables)) one = tf_util.constant(value=1, dtype='int') def apply_sync(): next_sync_updated = self.next_sync.assign( value=self.sync_frequency.value(), read_value=False) with tf.control_dependencies(control_inputs=(next_sync_updated, )): update_weight = self.update_weight.value() deltas = list() assignments = list() for source_variable, target_variable in zip( source_variables, variables): delta = update_weight * (source_variable - target_variable) deltas.append(delta) assignments.append( target_variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): # Trivial operation to enforce control dependency return [tf_util.identity(input=delta) for delta in deltas] def no_sync(): next_sync_updated = self.next_sync.assign_sub(delta=one, read_value=False) with tf.control_dependencies(control_inputs=(next_sync_updated, )): deltas = list() for variable in variables: delta = tf_util.zeros(shape=tf_util.shape(x=variable), dtype='float') deltas.append(delta) return deltas skip_sync = tf.math.greater(x=self.next_sync, y=one) return tf.cond(pred=skip_sync, true_fn=no_sync, false_fn=apply_sync)
def initialize_given_variables(self, *, variables, register_summaries): super().initialize_given_variables( variables=variables, register_summaries=register_summaries) values_spec = TensorsSpec(((var.name, TensorSpec(type=tf_util.dtype(x=var), shape=tf_util.shape(x=var))) for var in variables)) self.line_search.complete_initialize( arguments_spec=self.arguments_spec, values_spec=values_spec)
def no_sync(): next_sync_updated = self.next_sync.assign_sub(delta=one, read_value=False) with tf.control_dependencies(control_inputs=(next_sync_updated, )): deltas = list() for variable in variables: delta = tf_util.zeros(shape=tf_util.shape(x=variable), dtype='float') deltas.append(delta) return deltas
def initialize_given_variables(self, *, variables): assert not self.root.is_initialized and not self.is_initialized_given_variables for module in self.this_submodules: if isinstance(module, Optimizer): module.initialize_given_variables(variables=variables) # Replace "/" with "_" to ensure TensorDict is flat self.variables_spec = TensorsSpec( ((var.name[:-2].replace('/', '_'), TensorSpec(type=tf_util.dtype(x=var, fallback_tf_dtype=True), shape=tf_util.shape(x=var))) for var in variables)) self.is_initialized_given_variables = True
def step(self, *, arguments, variables, **kwargs): global_variables = kwargs['global_variables'] assert all( tf_util.shape(x=global_variable) == tf_util.shape(x=local_variable) for global_variable, local_variable in zip(global_variables, variables)) local_deltas = self.optimizer.step(arguments=arguments, variables=variables, **kwargs) with tf.control_dependencies(control_inputs=local_deltas): assignments = list() for variable, delta in zip(global_variables, local_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) with tf.control_dependencies(control_inputs=assignments): update_deltas = list() for global_variable, local_variable in zip(global_variables, variables): update_deltas.append(global_variable - local_variable) assignments = list() for variable, delta in zip(variables, update_deltas): assignments.append( variable.assign_add(delta=delta, read_value=False)) # TODO: Update time, episode, etc (like in Synchronization)? with tf.control_dependencies(control_inputs=assignments): return [ local_delta + update_delta for local_delta, update_delta in zip(local_deltas, update_deltas) ]
def fn_initial_gradients(*, states, horizons, internals, auxiliaries, actions, reward, reference): if 'policy' in internals: policy_internals = internals['policy'] baseline_internals = internals['baseline'] else: policy_internals = internals # TODO: Baseline currently cannot have internal states, since generally only policy # internals are passed to policy optimizer assert len(baseline.internals_spec) == 0 baseline_internals = TensorDict() actions = policy.act(states=states, horizons=horizons, internals=policy_internals, auxiliaries=auxiliaries, independent=True, return_internals=False) assert len(actions) == 1 action = actions.value() shape = tf_util.shape(x=action) assert len(shape) <= 2 with tf.GradientTape(persistent=False, watch_accessed_variables=False) as tape: tape.watch(tensor=action) actions_value = baseline.actions_value( states=states, horizons=horizons, internals=baseline_internals, auxiliaries=auxiliaries, actions=actions, reduced=True, return_per_action=False) if len(shape) == 1: return -tape.gradient(target=actions_value, sources=action)[0] elif len(shape) == 2 and shape[1] == 1: return -tape.gradient(target=actions_value, sources=action)[0][0] else: assert False
def loss(self, *, states, horizons, internals, auxiliaries, actions, reward, reference, policy): reference_spec1 = self.objective1.reference_spec() reference_spec2 = self.objective2.reference_spec() assert tf_util.shape(x=reference)[1] == reference_spec1.size + reference_spec2.size reference1 = reference[:, :reference_spec1.size] reference1 = tf.reshape(tensor=reference1, shape=((-1,) + reference_spec1.shape)) reference2 = reference[:, reference_spec1.size:] reference2 = tf.reshape(tensor=reference2, shape=((-1,) + reference_spec2.shape)) loss1 = self.objective1.loss( states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries, actions=actions, reward=reward, reference=reference1, policy=policy ) loss2 = self.objective2.loss( states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries, actions=actions, reward=reward, reference=reference2, policy=policy ) return loss1 + loss2
def variable(self, *, name, spec, initializer, is_trainable, is_saved, initialization_scale=None): assert self.is_initialized is False # name if not isinstance(name, str): raise TensorforceError.type(name='variable', argument='name', dtype=type(name)) # spec if not isinstance(spec, TensorSpec): raise TensorforceError.dtype(name='variable', argument='spec', dtype=type(spec)) if spec.is_underspecified(): raise TensorforceError.value(name='variable', argument='spec', value=spec, hint='underspecified') # initializer initializer_names = ('constant', 'normal', 'normal-relu', 'ones', 'orthogonal', 'orthogonal-relu', 'zeros') if not isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) and \ initializer not in initializer_names: raise TensorforceError.value(name='variable', argument='initializer', value=initializer) elif isinstance(initializer, np.ndarray) and initializer.dtype != spec.np_type(): raise TensorforceError.type(name='variable', argument='initializer', dtype=initializer.dtype) elif isinstance( initializer, tf.Tensor) and tf_util.dtype(x=initializer) != spec.tf_type(): raise TensorforceError.type(name='variable', argument='initializer', dtype=tf_util.dtype(x=initializer)) # initialization_scale if initialization_scale is not None: if isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) or \ initializer not in ('constant', 'orthogonal', 'orthogonal-relu'): raise TensorforceError.invalid( name='variable', argument='initialization_scale', condition='initializer not orthogonal') elif not isinstance(initialization_scale, spec.py_type()): raise TensorforceError.type(name='variable', argument='initialization_scale', dtype=type(initialization_scale), hint='!= float') # is_trainable if not isinstance(is_trainable, bool): raise TensorforceError.type(name='variable', argument='is_trainable', dtype=type(is_trainable)) elif is_trainable and spec.type != 'float': raise TensorforceError.value(name='variable', argument='is_trainable', value=is_trainable, condition='spec.type != float') # is_saved if not isinstance(is_saved, bool): raise TensorforceError.type(name='variable', argument='is_saved', dtype=type(is_saved)) # Variable initializer if isinstance(initializer, spec.py_type()): initializer = tf_util.constant(value=initializer, dtype=spec.type, shape=spec.shape) elif isinstance(initializer, np.ndarray): if initializer.shape != spec.shape: raise TensorforceError.mismatch(name='Module.variable', value1='shape', value2='initializer') initializer = tf_util.constant(value=initializer, dtype=spec.type) elif isinstance(initializer, tf.Tensor): if tf_util.shape(x=initializer) != spec.shape: raise TensorforceError.mismatch(name='Module.variable', value1='shape', value2='initializer') initializer = initializer elif not isinstance(initializer, str): raise TensorforceError( "Invalid variable initializer: {}".format(initializer)) elif initializer.startswith('normal'): if spec.type != 'float': raise TensorforceError( message= "Invalid variable initializer value for non-float variable: {}." .format(initializer)) if initializer.endswith('-relu'): stddev = min(0.1, np.sqrt(2.0 / util.product(xs=spec.shape[:-1]))) else: stddev = min( 0.1, np.sqrt( 2.0 / (util.product(xs=spec.shape[:-1]) + spec.shape[-1]))) initializer = tf.random.normal(shape=spec.shape, stddev=stddev, dtype=spec.tf_type()) elif initializer.startswith('orthogonal'): if spec.type != 'float': raise TensorforceError( message= "Invalid variable initializer value for non-float variable: {}." .format(initializer)) if spec.rank < 2: raise TensorforceError( message= "Invalid variable initializer value for 0/1-rank variable: {}." .format(initializer)) normal = np.random.normal(size=(util.product(xs=spec.shape[:-1]), spec.shape[-1])) u, _, v = np.linalg.svd(a=normal, full_matrices=False) orthogonal = u if u.shape[1] == spec.shape[-1] else v if initializer.endswith('-relu'): orthogonal = orthogonal * np.sqrt(2.0) if initialization_scale is not None and initialization_scale != 1.0: if initialization_scale <= 0.0: raise TensorforceError.value( name='variable', argument='initialization_scale', value=initialization_scale, hint='<= 0.0') orthogonal = orthogonal * initialization_scale initializer = tf_util.constant(value=orthogonal.reshape( spec.shape), dtype=spec.type) elif initializer == 'zeros': initializer = tf_util.zeros(shape=spec.shape, dtype=spec.type) elif initializer == 'ones': initializer = tf_util.ones(shape=spec.shape, dtype=spec.type) elif initializer == 'constant': initializer = tf.fill(dims=spec.shape, value=tf_util.constant( value=initialization_scale, dtype=spec.type)) # Variable variable = tf.Variable(initial_value=initializer, trainable=is_trainable, validate_shape=True, name=name, dtype=spec.tf_type(), shape=spec.shape) variable.is_saved = is_saved return variable