def dnn_logits_fn(): """Builds the logits from the input layer.""" previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) return dnn_logits
def inference_graph(self, data): with ops.device(self.device_assigner): # Compute activations for the neural network. nn_activations = layers.fully_connected(data, self.params.layer_size) for _ in range(1, self.params.num_layers): # pylint: disable=W0106 nn_activations = layers.fully_connected(nn_activations, self.params.layer_size) return nn_activations
def build_model(self, features, feature_columns, is_training): """See base class.""" self._feature_columns = feature_columns input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( self._scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, self._get_feature_columns(), weight_collections=[self._scope], trainable=self._trainable, scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas)) for layer_id, num_hidden_units in enumerate(self._hidden_units): with variable_scope.variable_scope( self._scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._activation_fn, variables_collections=[self._scope], trainable=self._trainable, scope=scope) if self._dropout is not None and is_training: net = layers.dropout(net, keep_prob=(1.0 - self._dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( self._scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, self._num_label_columns, activation_fn=None, variables_collections=[self._scope], trainable=self._trainable, scope=scope) self._add_hidden_layer_summary(logits, "logits") return logits
def inference_graph(self, data): with ops.device(self.device_assigner): # Compute activations for the neural network. nn_activations = [layers.fully_connected(data, self.params.layer_size)] for _ in range(1, self.params.num_layers): # pylint: disable=W0106 nn_activations.append( layers.fully_connected( nn_activations[-1], self.params.layer_size)) nn_activations_tensor = array_ops.concat( nn_activations, 1, name="flattened_nn_activations") return nn_activations_tensor
def inference_graph(self, data): with ops.device(self.device_assigner): # Compute activations for the neural network. nn_activations = layers.fully_connected(data, 1) # There is always one activation per instance by definition, so squeeze # away the extra dimension. return array_ops.squeeze(nn_activations, axis=[1])
def dummy_discriminator_fn(input_data, num_domains, mode): del mode hidden = layers.flatten(input_data) output_src = math_ops.reduce_mean(hidden, axis=1) output_cls = layers.fully_connected( inputs=hidden, num_outputs=num_domains, scope='debug') return output_src, output_cls
def _discriminator_fn(inputs, num_domains): """Differentiable dummy discriminator for StarGAN.""" hidden = layers.flatten(inputs) output_src = math_ops.reduce_mean(hidden, axis=1) output_cls = layers.fully_connected(inputs=hidden, num_outputs=num_domains, activation_fn=None, normalizer_fn=None, biases_initializer=None) return output_src, output_cls
def _base_inference(self, data, data_spec=None): """Returns an op that performs inference without a softmax.""" inference_result = self._do_layer_inference(self.layers[0], data) for layer in self.layers[1:]: inference_result = self._do_layer_inference(layer, inference_result) output_size = 1 if self.is_regression else self.params.num_classes output = layers.fully_connected( inference_result, output_size, activation_fn=array_ops.identity) return output
def _base_inference(self, data, data_spec=None, soft=False): if soft: inference_result = self.layers[0].soft_inference_graph(data) else: inference_result = self._do_layer_inference(self.layers[0], data) for layer in self.layers[1:]: inference_result = self._do_layer_inference( layer, inference_result) output_size = 1 if self.is_regression else self.params.num_classes output = layers.fully_connected(inference_result, output_size, activation_fn=nn_ops.softmax) return output
def construct_rnn(initial_state, sequence_input, cell, num_label_columns, dtype=dtypes.float32, parallel_iterations=32, swap_memory=True): """Build an RNN and apply a fully connected layer to get the desired output. Args: initial_state: The initial state to pass the RNN. If `None`, the default starting state for `self._cell` is used. sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]` that will be passed as input to the RNN. cell: An initialized `RNNCell`. num_label_columns: The desired output dimension. dtype: dtype of `cell`. parallel_iterations: Number of iterations to run in parallel. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. Returns: activations: The output of the RNN, projected to `num_label_columns` dimensions. final_state: A `Tensor` or nested tuple of `Tensor`s representing the final state output by the RNN. """ with ops.name_scope('RNN'): rnn_outputs, final_state = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, initial_state=initial_state, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=False) activations = layers.fully_connected(inputs=rnn_outputs, num_outputs=num_label_columns, activation_fn=None, trainable=True) return activations, final_state
def construct_state_saving_rnn(cell, inputs, num_label_columns, state_saver, state_name, scope='rnn'): """Build a state saving RNN and apply a fully connected layer. Args: cell: An instance of `RNNCell`. inputs: A length `T` list of inputs, each a `Tensor` of shape `[batch_size, input_size, ...]`. num_label_columns: The desired output dimension. state_saver: A state saver object with methods `state` and `save_state`. state_name: Python string or tuple of strings. The name to use with the state_saver. If the cell returns tuples of states (i.e., `cell.state_size` is a tuple) then `state_name` should be a tuple of strings having the same length as `cell.state_size`. Otherwise it should be a single string. scope: `VariableScope` for the created subgraph; defaults to "rnn". Returns: activations: The output of the RNN, projected to `num_label_columns` dimensions, a `Tensor` of shape `[batch_size, T, num_label_columns]`. final_state: The final state output by the RNN """ with ops.name_scope(scope): rnn_outputs, final_state = rnn.static_state_saving_rnn( cell=cell, inputs=inputs, state_saver=state_saver, state_name=state_name, scope=scope) # Convert rnn_outputs from a list of time-major order Tensors to a single # Tensor of batch-major order. rnn_outputs = array_ops.stack(rnn_outputs, axis=1) activations = layers.fully_connected(inputs=rnn_outputs, num_outputs=num_label_columns, activation_fn=None, trainable=True) # Use `identity` to rename `final_state`. final_state = array_ops.identity( final_state, name=rnn_common.RNNKeys.FINAL_STATE_KEY) return activations, final_state
def _propagate(dim_indices, conf, cells, c_prev, m_prev, new_output, new_state, first_call): """Propagates through all the cells in dim_indices dimensions. """ if len(dim_indices) == 0: return # Because of the way RNNCells are implemented, we take the last dimension # (H_{N-1}) out and feed it as the state of the RNN cell # (in `last_dim_output`). # The input of the cell (H_0 to H_{N-2}) are concatenated into `cell_inputs` if conf.num_dims > 1: ls_cell_inputs = [None] * (conf.num_dims - 1) for d in conf.dims[:-1]: if new_output[d.idx] is None: ls_cell_inputs[d.idx] = m_prev[d.idx] else: ls_cell_inputs[d.idx] = new_output[d.idx] cell_inputs = array_ops.concat(ls_cell_inputs, 1) else: cell_inputs = array_ops.zeros([m_prev[0].get_shape().as_list()[0], 0], m_prev[0].dtype) last_dim_output = (new_output[-1] if new_output[-1] is not None else m_prev[-1]) for i in dim_indices: d = conf.dims[i] if d.non_recurrent_fn: if conf.num_dims > 1: linear_args = array_ops.concat([cell_inputs, last_dim_output], 1) else: linear_args = last_dim_output with vs.variable_scope('non_recurrent' if conf.tied else 'non_recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() new_output[d.idx] = layers.fully_connected( linear_args, num_outputs=conf.num_units, activation_fn=d.non_recurrent_fn, weights_initializer=( vs.get_variable_scope().initializer or layers.initializers.xavier_initializer), weights_regularizer=vs.get_variable_scope().regularizer) else: if c_prev[i] is not None: cell_state = (c_prev[i], last_dim_output) else: # for GRU/RNN, the state is just the previous output cell_state = last_dim_output with vs.variable_scope('recurrent' if conf. tied else 'recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() cell = cells[i] new_output[d.idx], new_state[d.idx] = cell( cell_inputs, cell_state)
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. Note that a string containing the unqualified name of the op may also be provided, e.g., "relu", "tanh", or "sigmoid". * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = _get_activation_fn(params.get("activation_fn")) dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope(parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all( isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer(features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net, )) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope("logits", values=(net, )) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def discriminator_fn(data, unused_conditioning, mode): del unused_conditioning, mode return layers.fully_connected(data, 1)
def generator_fn(noise_dict, mode): del mode noise = noise_dict['x'] return layers.fully_connected(noise, tensor_shape.dimension_value(noise.shape[1]))