def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): assert features is not None assert training is not None assert iteration_step is not None assert summary is not None # Trainable variables collection should always be empty when # build_subnetwork is called. assert not tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES) # Subnetworks get iteration steps instead of global steps. step_name = "subnetwork_test/iteration_step" assert step_name == tf_compat.tensor_name( tf_compat.v1.train.get_global_step()) assert step_name == tf_compat.tensor_name(train.get_global_step()) assert step_name == tf_compat.tensor_name(training_util.get_global_step()) assert step_name == tf_compat.tensor_name(tf_v1.train.get_global_step()) assert step_name == tf_compat.tensor_name( tf_compat.v1.train.get_or_create_global_step()) assert step_name == tf_compat.tensor_name(train.get_or_create_global_step()) assert step_name == tf_compat.tensor_name( training_util.get_or_create_global_step()) assert step_name == tf_compat.tensor_name( tf_v1.train.get_or_create_global_step()) # Subnetworks get scoped summaries. assert "fake_scalar" == tf_compat.v1.summary.scalar("scalar", 1.) assert "fake_image" == tf_compat.v1.summary.image("image", 1.) assert "fake_histogram" == tf_compat.v1.summary.histogram("histogram", 1.) assert "fake_audio" == tf_compat.v1.summary.audio("audio", 1., 1.) last_layer = tu.dummy_tensor(shape=(2, 3)) def logits_fn(logits_dim): return tf_compat.v1.layers.dense( last_layer, units=logits_dim, kernel_initializer=tf_compat.v1.glorot_uniform_initializer( seed=self._seed)) if self._multi_head: logits = { "head1": logits_fn(logits_dimension / 2), "head2": logits_fn(logits_dimension / 2) } last_layer = {"head1": last_layer, "head2": last_layer} else: logits = logits_fn(logits_dimension) return Subnetwork( last_layer=logits if self._use_logits_last_layer else last_layer, logits=logits, complexity=2, persisted_tensors={})
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN # Call in template to ensure that variables are created once and reused. call_model_fn_template = tf.make_template("model_fn", self._call_model_fn) subestimator_features, subestimator_labels = features, labels if training and self._subestimator.train_input_fn: # TODO: Consider tensorflow_estimator/python/estimator/util.py. inputs = self._subestimator.train_input_fn() if isinstance(inputs, (tf_compat.DatasetV1, tf_compat.DatasetV2)): subestimator_features, subestimator_labels = ( tf_compat.make_one_shot_iterator(inputs).get_next()) else: subestimator_features, subestimator_labels = inputs # Construct subnetwork graph first because of dependencies on scope. _, train_op = call_model_fn_template(subestimator_features, subestimator_labels, mode, summary) # Graph for ensemble learning gets model_fn_1 for scope. logits, _ = call_model_fn_template(features, labels, mode, summary) else: logits, train_op = call_model_fn_template(features, labels, mode, summary) # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return Subnetwork( logits=logits, last_layer=logits, shared={"train_op": train_op}, complexity=complexity)
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): model_fn = self._estimator.model_fn # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN estimator_spec = model_fn(features=features, labels=labels, mode=mode, config=self._estimator.config) logits = self._logits_fn(estimator_spec=estimator_spec) self._subnetwork_train_op = TrainOpSpec( estimator_spec.train_op, chief_hooks=estimator_spec.training_chief_hooks, hooks=estimator_spec.training_hooks) # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return Subnetwork(logits=logits, last_layer=logits, persisted_tensors={}, complexity=complexity)
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN # Call in template to ensure that variables are created once and reused. call_model_fn_template = tf.make_template("model_fn", self._call_model_fn) logits, train_op = call_model_fn_template(features, labels, mode) if training and self._subestimator.train_input_fn: # TODO: Consider tensorflow_estimator/python/estimator/util.py. inputs = self._subestimator.train_input_fn() if isinstance(inputs, (tf_compat.DatasetV1, tf_compat.DatasetV2)): features, labels = tf_compat.make_one_shot_iterator( inputs).get_next() else: features, labels = inputs # Use different training set for train op only. _, train_op = call_model_fn_template(features, labels, mode) self._subnetwork_train_op = train_op # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return Subnetwork(logits=logits, last_layer=logits, persisted_tensors={}, complexity=complexity)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 with tf.compat.v1.variable_scope("dnn"): persisted_tensors = {} with tf.compat.v1.variable_scope("hidden_layer"): if self._feature_columns: input_layer = tf.compat.v1.feature_column.input_layer( features=features, feature_columns=self._feature_columns) else: input_layer = features["x"] w = tf.compat.v1.get_variable( shape=[input_layer.shape[1], self._layer_size], initializer=tf.compat.v1.glorot_uniform_initializer( seed=seed), name="weight") hidden_layer = tf.matmul(input_layer, w) if previous_ensemble: other_hidden_layer = previous_ensemble.weighted_subnetworks[ -1].subnetwork.persisted_tensors["hidden_layer"] hidden_layer = tf.concat([hidden_layer, other_hidden_layer], axis=1) # Use a leaky-relu activation so that gradients can flow even when # outputs are negative. Leaky relu has a non-zero slope when x < 0. # Otherwise success at learning is completely dependent on random seed. hidden_layer = tf.nn.leaky_relu(hidden_layer, alpha=.2) persisted_tensors["hidden_layer"] = hidden_layer if training: # This change will only be in the next iteration if # `freeze_training_graph` is `True`. persisted_tensors["hidden_layer"] = 2 * hidden_layer with tf.compat.v1.variable_scope("logits"): logits = tf.compat.v1.layers.dense( hidden_layer, logits_dimension, kernel_initializer=tf.compat.v1.glorot_uniform_initializer( seed=seed)) summary.scalar("scalar", 3) summary.image("image", tf.ones([1, 3, 3, 1])) with tf.compat.v1.variable_scope("nested"): summary.scalar("scalar", 5) return Subnetwork(last_layer=logits, logits=logits, complexity=3, persisted_tensors=persisted_tensors)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): return Subnetwork(last_layer=tu.dummy_tensor(), logits=tu.dummy_tensor([2, logits_dimension]), complexity=tu.dummy_tensor(), persisted_tensors={"random_seed": self._random_seed})
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN # Call in template to ensure that variables are created once and reused. call_model_fn_template = tf.compat.v1.make_template( "model_fn", self._call_model_fn) subestimator_features, subestimator_labels = features, labels local_init_ops = [] if training and self._subestimator.train_input_fn: # TODO: Consider tensorflow_estimator/python/estimator/util.py. inputs = self._subestimator.train_input_fn() if isinstance(inputs, (tf_compat.DatasetV1, tf_compat.DatasetV2)): subestimator_features, subestimator_labels = ( tf_compat.make_one_shot_iterator(inputs).get_next()) else: subestimator_features, subestimator_labels = inputs # Construct subnetwork graph first because of dependencies on scope. _, _, bagging_train_op_spec, sub_local_init_op = call_model_fn_template( subestimator_features, subestimator_labels, mode, summary) # Graph for ensemble learning gets model_fn_1 for scope. logits, last_layer, _, ensemble_local_init_op = call_model_fn_template( features, labels, mode, summary) if sub_local_init_op: local_init_ops.append(sub_local_init_op) if ensemble_local_init_op: local_init_ops.append(ensemble_local_init_op) # Run train op in a hook so that exceptions can be intercepted by the # AdaNet framework instead of the Estimator's monitored training session. hooks = bagging_train_op_spec.hooks + (_SecondaryTrainOpRunnerHook( bagging_train_op_spec.train_op), ) train_op_spec = TrainOpSpec( train_op=tf.no_op(), chief_hooks=bagging_train_op_spec.chief_hooks, hooks=hooks) else: logits, last_layer, train_op_spec, local_init_op = call_model_fn_template( features, labels, mode, summary) if local_init_op: local_init_ops.append(local_init_op) # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return Subnetwork(logits=logits, last_layer=last_layer, shared={"train_op": train_op_spec}, complexity=complexity, local_init_ops=local_init_ops)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): logits = tf_compat.v1.layers.dense( features["x"], logits_dimension, kernel_initializer=tf_compat.v1.glorot_uniform_initializer( seed=42)) * np.nan return Subnetwork(last_layer=logits, logits=logits, complexity=0)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 num_ps_replicas = self._config.num_ps_replicas if self._config else 0 partitioner = tf_compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf_compat.v1.variable_scope("dnn", partitioner=partitioner): shared = {} with tf_compat.v1.variable_scope("hidden_layer"): w = tf_compat.v1.get_variable( shape=[2, self._layer_size], initializer=tf_compat.v1.glorot_uniform_initializer( seed=seed), name="weight") hidden_layer = tf.matmul(features["x"], w) if previous_ensemble: other_hidden_layer = previous_ensemble.weighted_subnetworks[ -1].subnetwork.shared["hidden_layer"] hidden_layer = tf.concat([hidden_layer, other_hidden_layer], axis=1) # Use a leaky-relu activation so that gradients can flow even when # outputs are negative. Leaky relu has a non-zero slope when x < 0. # Otherwise success at learning is completely dependent on random seed. hidden_layer = tf.nn.leaky_relu(hidden_layer, alpha=.2) shared["hidden_layer"] = hidden_layer with tf_compat.v1.variable_scope("logits"): logits = tf_compat.v1.layers.dense( hidden_layer, logits_dimension, kernel_initializer=tf_compat.v1.glorot_uniform_initializer( seed=seed)) summary.scalar("scalar", 3) return Subnetwork(last_layer=logits, logits=logits, complexity=3, shared=shared)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 with tf_compat.v1.variable_scope("simple"): input_layer = tf_compat.v1.feature_column.input_layer( features=features, feature_columns=tf.feature_column.numeric_column("x", 2)) last_layer = input_layer with tf_compat.v1.variable_scope("logits"): logits = tf_compat.v1.layers.dense( last_layer, logits_dimension, kernel_initializer=tf_compat.v1.glorot_uniform_initializer( seed=seed)) summary.scalar("scalar", 3) batch_size = features["x"].get_shape().as_list()[0] summary.image("image", tf.ones([batch_size, 3, 3, 1])) with tf_compat.v1.variable_scope("nested"): summary.scalar("scalar", 5) return Subnetwork( last_layer=last_layer, logits=logits, complexity=1, persisted_tensors={}, )
def dummy_ensemble_spec(name, random_seed=42, num_subnetworks=1, bias=0., loss=None, adanet_loss=None, eval_metrics=None, dict_predictions=False, export_output_key=None, subnetwork_builders=None, train_op=None): """Creates a dummy `_EnsembleSpec` instance. Args: name: _EnsembleSpec's name. random_seed: A scalar random seed. num_subnetworks: The number of fake subnetworks in this ensemble. bias: Bias value. loss: Float loss to return. When None, it's picked from a random distribution. adanet_loss: Float AdaNet loss to return. When None, it's picked from a random distribution. eval_metrics: Optional eval metrics tuple of (metric_fn, tensor args). dict_predictions: Boolean whether to return predictions as a dictionary of `Tensor` or just a single float `Tensor`. export_output_key: An `ExportOutputKeys` for faking export outputs. subnetwork_builders: List of `adanet.subnetwork.Builder` objects. train_op: A train op. Returns: A dummy `_EnsembleSpec` instance. """ if loss is None: loss = dummy_tensor([], random_seed) if adanet_loss is None: adanet_loss = dummy_tensor([], random_seed * 2) else: adanet_loss = tf.convert_to_tensor(value=adanet_loss) logits = dummy_tensor([], random_seed * 3) if dict_predictions: predictions = { "logits": logits, "classes": tf.cast(tf.abs(logits), dtype=tf.int64) } else: predictions = logits weighted_subnetworks = [ WeightedSubnetwork(name=name, iteration_number=1, logits=dummy_tensor([2, 1], random_seed * 4), weight=dummy_tensor([2, 1], random_seed * 4), subnetwork=Subnetwork( last_layer=dummy_tensor([1, 2], random_seed * 4), logits=dummy_tensor([2, 1], random_seed * 4), complexity=1., persisted_tensors={})) ] export_outputs = _dummy_export_outputs(export_output_key, logits, predictions) bias = tf.constant(bias) return _EnsembleSpec(name=name, ensemble=ComplexityRegularized( weighted_subnetworks=weighted_subnetworks * num_subnetworks, bias=bias, logits=logits, ), architecture=_Architecture("dummy_ensemble_candidate", "dummy_ensembler"), subnetwork_builders=subnetwork_builders, predictions=predictions, step=tf.Variable(0), loss=loss, adanet_loss=adanet_loss, train_op=train_op, eval_metrics=eval_metrics, export_outputs=export_outputs)