def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): model_fn = self._estimator.model_fn # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN estimator_spec = model_fn(features=features, labels=labels, mode=mode, config=self._estimator.config) logits = self._logits_fn(estimator_spec=estimator_spec) self._subnetwork_train_op = TrainOpSpec( estimator_spec.train_op, chief_hooks=estimator_spec.training_chief_hooks, hooks=estimator_spec.training_hooks) # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return Subnetwork(logits=logits, last_layer=logits, persisted_tensors={}, complexity=complexity)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): assert features is not None assert training is not None assert iteration_step is not None assert summary is not None # Trainable variables collection should always be empty when # build_subnetwork is called. assert not tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # Subnetworks get iteration steps instead of global steps. global_step = tf.train.get_global_step() assert "ensemble_test/iteration_step" == global_step.op.name # Subnetworks get scoped summaries. assert "fake_scalar" == tf.summary.scalar("scalar", 1.) assert "fake_image" == tf.summary.image("image", 1.) assert "fake_histogram" == tf.summary.histogram("histogram", 1.) assert "fake_audio" == tf.summary.audio("audio", 1., 1.) last_layer = tu.dummy_tensor(shape=(2, 3)) logits = tf.layers.dense( last_layer, units=logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(seed=self._seed)) return Subnetwork( last_layer=logits if self._use_logits_last_layer else last_layer, logits=logits, complexity=2, persisted_tensors={})
def _dnn(features): inputs = _extract_feature(features) layer_size = 10 with tf.variable_scope("dnn"): with tf.variable_scope("hidden_layer"): w = tf.Variable(tf.random_normal([2, layer_size], seed=seed), name="weight") b = tf.Variable(tf.random_normal([layer_size], seed=seed), name="bias") hidden_layer = tf.matmul(inputs, w) + b with tf.variable_scope("logits"): w = tf.Variable(tf.random_normal([layer_size, 1], seed=seed), name="weight") b = tf.Variable(tf.random_normal([1], seed=seed), name="bias") predictions = tf.matmul(hidden_layer, w) + b some_persisted_tensor_constant = tf.constant( seed, name="some_persisted_tensor_constant") persisted_tensors = {} if keep_persisted_tensors: persisted_tensors = { "some_persisted_tensor_constant": some_persisted_tensor_constant, } complexity = tf.constant(6, name="complexity") subnetwork = Subnetwork(last_layer=hidden_layer, logits=predictions, complexity=complexity, persisted_tensors=persisted_tensors) return WeightedSubnetwork(name=tf.constant("dnn", name="name"), logits=predictions, weight=w, subnetwork=subnetwork)
def _simple(features): inputs = tf.feature_column.input_layer(features=features, feature_columns=feature_columns) with tf.variable_scope("simple"): with tf.variable_scope("logits"): w = tf.Variable(tf.random_normal([2, 2], seed=seed), name="weight") b = tf.Variable(tf.random_normal([1], seed=seed), name="bias") predictions = tf.matmul(inputs, w) + b some_persisted_tensor_constant = tf.constant( seed, name="some_persisted_tensor_constant") persisted_tensors = {} if keep_persisted_tensors: persisted_tensors = { "some_persisted_tensor_constant": some_persisted_tensor_constant, } complexity = tf.constant(3, name="complexity") subnetwork = Subnetwork(last_layer=predictions, logits=predictions, complexity=complexity, persisted_tensors=persisted_tensors) return WeightedSubnetwork(name=tf.constant("simple", name="name"), logits=predictions, weight=w, subnetwork=subnetwork)
def _linear(features): inputs = _extract_feature(features) with tf.variable_scope("linear"): with tf.variable_scope("logits"): w = tf.Variable(tf.random_normal([2, 1], seed=seed), name="weight") b = tf.Variable(tf.random_normal([1], seed=seed), name="bias") predictions = tf.matmul(inputs, w) + b some_persisted_tensor_constant = tf.constant( seed, name="some_persisted_tensor_constant") nested_persisted_tensor_constant = tf.constant( seed, name="nested_persisted_tensor_constant") persisted_tensors = {} if keep_persisted_tensors: persisted_tensors = { "some_persisted_tensor_constant": some_persisted_tensor_constant, "nested": { "nested": { "value": nested_persisted_tensor_constant, "separated/by/slash": nested_persisted_tensor_constant, }, "value": some_persisted_tensor_constant, } } complexity = tf.constant(3, name="complexity") subnetwork = Subnetwork( last_layer=inputs, logits=predictions, complexity=complexity, persisted_tensors=persisted_tensors) return WeightedSubnetwork( name=tf.constant("linear", name="name"), logits=predictions, weight=w, subnetwork=subnetwork)
def _make_metrics(sess, metric_fn): head = tf.contrib.estimator.binary_classification_head( loss_reduction=tf.losses.Reduction.SUM) builder = _EnsembleBuilder(head, MixtureWeightType.SCALAR, metric_fn=metric_fn) features = {"x": tf.constant([[1.], [2.]])} labels = tf.constant([0, 1]) ensemble_spec = builder.build_ensemble_spec( "fake_ensemble", [ WeightedSubnetwork(name=tf.constant("fake_weighted"), logits=[[1.], [2.]], weight=[1.], subnetwork=Subnetwork(logits=[[1.], [2.]], last_layer=[1.], complexity=1., persisted_tensors={})) ], summary=_FakeSummary(), bias=0., features=features, mode=tf.estimator.ModeKeys.EVAL, labels=labels, iteration_step=1.) sess.run( (tf.global_variables_initializer(), tf.local_variables_initializer())) metrics = sess.run(ensemble_spec.eval_metric_ops) return {k: metrics[k][1] for k in metrics}
def _reconstruct_subnetwork(self, index): """Reconstructs a `Subnetwork` from the graph's collections. Args: index: Integer index of the subnetwork in a list of subnetworks. Returns: A frozen `Subnetwork` instance. Raises: ValueError: If a field in the frozen collection does not belong to a subnetwork. This should not happen if the collection was created by `freeze_ensemble`. """ last_layer = None logits = None complexity = None persisted_tensors = {} for key in tf.get_default_graph().get_all_collection_keys(): prefix = self._subnetwork_collection_key(index, "") if prefix not in key: continue # Verify that each frozen collection is of size one, as each collection # should have been cleared before adding a tensor to freeze. frozen_collection = tf.get_collection(key) assert len(frozen_collection) == 1 frozen_tensor = frozen_collection[-1] field = self._subnetwork_collection_key_field(key, index) if field is None: continue if field == self.Keys.LAST_LAYER: last_layer = frozen_tensor continue if field == self.Keys.LOGITS: logits = frozen_tensor continue if field == self.Keys.COMPLEXITY: complexity = frozen_tensor continue if field.startswith(self.Keys.PERSISTED_TENSORS): # Remove persisted tensors prefix plus separator. prefix_length = len(self.Keys.PERSISTED_TENSORS) prefix_length += len(self.Keys.PERSISTED_TENSORS_SEPARATOR) field = field[prefix_length:] persisted_tensors = self._reconstruct_persisted_tensor( field, frozen_tensor, persisted_tensors) continue # This line should not be hit if the frozen graph was created with # freeze_ensemble. raise ValueError("'{}' in not a valid field.".format(field)) return Subnetwork( last_layer=last_layer, logits=logits, complexity=complexity, persisted_tensors=persisted_tensors)
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble): """See `adanet.subnetwork.Builder`.""" model_fn = self._estimator.model_fn # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN estimator_spec = model_fn(features=features, labels=labels, mode=mode, config=self._estimator.config) self._subnetwork_train_op = estimator_spec.train_op # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) if isinstance(estimator_spec.predictions, dict): logits = estimator_spec.predictions[self._logits_key] else: logits = estimator_spec.predictions return Subnetwork(logits=logits, last_layer=logits, persisted_tensors={}, complexity=complexity)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): return Subnetwork(last_layer=tu.dummy_tensor(), logits=tu.dummy_tensor([2, logits_dimension]), complexity=tu.dummy_tensor(), persisted_tensors={"random_seed": self._random_seed})
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 with tf.variable_scope("dnn"): persisted_tensors = {} with tf.variable_scope("hidden_layer"): w = tf.get_variable( shape=[2, self._layer_size], initializer=tf.glorot_uniform_initializer(seed=seed), name="weight") hidden_layer = tf.matmul(features["x"], w) if previous_ensemble: other_hidden_layer = previous_ensemble.weighted_subnetworks[ -1].subnetwork.persisted_tensors["hidden_layer"] hidden_layer = tf.concat([hidden_layer, other_hidden_layer], axis=1) # Use a leaky-relu activation so that gradients can flow even when # outputs are negative. Leaky relu has a non-zero slope when x < 0. # Otherwise success at learning is completely dependent on random seed. hidden_layer = tf.nn.leaky_relu(hidden_layer, alpha=.2) persisted_tensors["hidden_layer"] = hidden_layer if training: # This change will only be in the next iteration if # `freeze_training_graph` is `True`. persisted_tensors["hidden_layer"] = 2 * hidden_layer with tf.variable_scope("logits"): logits = tf.layers.dense( hidden_layer, logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(seed=seed)) summary.scalar("scalar", 3) batch_size = features["x"].get_shape().as_list()[0] summary.image("image", tf.ones([batch_size, 3, 3, 1])) with tf.variable_scope("nested"): summary.scalar("scalar", 5) return Subnetwork(last_layer=logits, logits=logits, complexity=3, persisted_tensors=persisted_tensors)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 num_ps_replicas = self._config.num_ps_replicas if self._config else 0 partitioner = tf.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf.variable_scope("dnn", partitioner=partitioner): shared = {} with tf.variable_scope("hidden_layer"): w = tf.get_variable( shape=[2, self._layer_size], initializer=tf.glorot_uniform_initializer(seed=seed), name="weight") hidden_layer = tf.matmul(features["x"], w) if previous_ensemble: other_hidden_layer = previous_ensemble.weighted_subnetworks[ -1].subnetwork.shared["hidden_layer"] hidden_layer = tf.concat([hidden_layer, other_hidden_layer], axis=1) # Use a leaky-relu activation so that gradients can flow even when # outputs are negative. Leaky relu has a non-zero slope when x < 0. # Otherwise success at learning is completely dependent on random seed. hidden_layer = tf.nn.leaky_relu(hidden_layer, alpha=.2) shared["hidden_layer"] = hidden_layer with tf.variable_scope("logits"): logits = tf.layers.dense( hidden_layer, logits_dimension, kernel_initializer=tf.glorot_uniform_initializer( seed=seed)) summary.scalar("scalar", 3) return Subnetwork(last_layer=logits, logits=logits, complexity=3, shared=shared)
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): assert features is not None assert training is not None assert iteration_step is not None assert summary is not None last_layer = tu.dummy_tensor(shape=(2, 3)) logits = tf.layers.dense( last_layer, units=logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(seed=self._seed)) return Subnetwork( last_layer=logits if self._use_logits_last_layer else last_layer, logits=logits, complexity=2, persisted_tensors={})
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed with tf.variable_scope("dnn"): persisted_tensors = {} prev_layer_size = 2 prev_layer = features["x"] for i in range(self._num_layers): with tf.variable_scope("hidden_layer_{}".format(i)): w = tf.get_variable( shape=[prev_layer_size, self._layer_size], initializer=tf.glorot_uniform_initializer(seed=seed), name="weight") hidden_layer = tf.matmul(prev_layer, w) persisted_tensors["hidden_layer_{}".format( i)] = hidden_layer hidden_layer = tf.nn.relu(hidden_layer) prev_layer = hidden_layer prev_layer_size = self._layer_size with tf.variable_scope("logits"): logits = tf.layers.dense( prev_layer, units=logits_dimension, kernel_initializer=tf.glorot_uniform_initializer( seed=seed)) return Subnetwork( last_layer=prev_layer, logits=logits, complexity=3, persisted_tensors=persisted_tensors, )
def dummy_ensemble_spec(name, random_seed=42, num_subnetworks=1, bias=0., loss=None, adanet_loss=None, eval_metrics=None, dict_predictions=False, export_output_key=None, subnetwork_builders=None, train_op=None): """Creates a dummy `_EnsembleSpec` instance. Args: name: _EnsembleSpec's name. random_seed: A scalar random seed. num_subnetworks: The number of fake subnetworks in this ensemble. bias: Bias value. loss: Float loss to return. When None, it's picked from a random distribution. adanet_loss: Float AdaNet loss to return. When None, it's picked from a random distribution. eval_metrics: Optional eval metrics tuple of (metric_fn, tensor args). dict_predictions: Boolean whether to return predictions as a dictionary of `Tensor` or just a single float `Tensor`. export_output_key: An `ExportOutputKeys` for faking export outputs. subnetwork_builders: List of `adanet.subnetwork.Builder` objects. train_op: A train op. Returns: A dummy `_EnsembleSpec` instance. """ if loss is None: loss = dummy_tensor([], random_seed) if adanet_loss is None: adanet_loss = dummy_tensor([], random_seed * 2) else: adanet_loss = tf.convert_to_tensor(adanet_loss) logits = dummy_tensor([], random_seed * 3) if dict_predictions: predictions = { "logits": logits, "classes": tf.cast(tf.abs(logits), dtype=tf.int64) } else: predictions = logits weighted_subnetworks = [ WeightedSubnetwork( name=name, iteration_number=1, logits=dummy_tensor([2, 1], random_seed * 4), weight=dummy_tensor([2, 1], random_seed * 4), subnetwork=Subnetwork( last_layer=dummy_tensor([1, 2], random_seed * 4), logits=dummy_tensor([2, 1], random_seed * 4), complexity=1., persisted_tensors={})) ] export_outputs = _dummy_export_outputs(export_output_key, logits, predictions) bias = tf.constant(bias) return _EnsembleSpec( name=name, ensemble=ComplexityRegularized( weighted_subnetworks=weighted_subnetworks * num_subnetworks, bias=bias, logits=logits, ), architecture=_Architecture("dummy_ensemble_candidate"), subnetwork_builders=subnetwork_builders, predictions=predictions, loss=loss, adanet_loss=adanet_loss, train_op=train_op, eval_metrics=eval_metrics, export_outputs=export_outputs)
def dummy_ensemble_spec(name, random_seed=42, num_subnetworks=1, bias=0., loss=None, adanet_loss=None, complexity_regularized_loss=None, eval_metric_ops=None, dict_predictions=False, export_output_key=None, train_op=None): """Creates a dummy `_EnsembleSpec` instance. Args: name: _EnsembleSpec's name. random_seed: A scalar random seed. num_subnetworks: The number of fake subnetworks in this ensemble. bias: Bias value. loss: Float loss to return. When None, it's picked from a random distribution. adanet_loss: Float AdaNet loss to return. When None, it's picked from a random distribution. complexity_regularized_loss: Float complexity regularized loss to return. When None, it's picked from a random distribution. eval_metric_ops: Optional dictionary of metric ops. dict_predictions: Boolean whether to return predictions as a dictionary of `Tensor` or just a single float `Tensor`. export_output_key: An `ExportOutputKeys` for faking export outputs. train_op: A train op. Returns: A dummy `_EnsembleSpec` instance. """ if loss is None: loss = dummy_tensor([], random_seed) elif not isinstance(loss, tf.Tensor): loss = tf.constant(loss) if adanet_loss is None: adanet_loss = dummy_tensor([], random_seed * 2) else: adanet_loss = tf.convert_to_tensor(adanet_loss) if complexity_regularized_loss is None: complexity_regularized_loss = dummy_tensor([], random_seed * 2) elif not isinstance(complexity_regularized_loss, tf.Tensor): complexity_regularized_loss = tf.constant(complexity_regularized_loss) logits = dummy_tensor([], random_seed * 3) if dict_predictions: predictions = { "logits": logits, "classes": tf.cast(tf.abs(logits), dtype=tf.int64) } else: predictions = logits weighted_subnetworks = [ WeightedSubnetwork( name=tf.constant(name), logits=dummy_tensor([2, 1], random_seed * 4), weight=dummy_tensor([2, 1], random_seed * 4), subnetwork=Subnetwork( last_layer=dummy_tensor([1, 2], random_seed * 4), logits=dummy_tensor([2, 1], random_seed * 4), complexity=1., persisted_tensors={})) ] export_outputs = _dummy_export_outputs(export_output_key, logits, predictions) bias = tf.constant(bias) return _EnsembleSpec( name=name, ensemble=Ensemble( weighted_subnetworks=weighted_subnetworks * num_subnetworks, bias=bias, logits=logits, ), predictions=predictions, loss=loss, adanet_loss=adanet_loss, complexity_regularized_loss=complexity_regularized_loss, complexity_regularization=1, eval_metric_ops=eval_metric_ops, train_op=train_op, export_outputs=export_outputs)