def create_user_embeddings(self, features): # input user embedding with variable_scope.variable_scope( "user_embeddings", partitioner=self.partitioner, initializer=self.initializer) as scope: embeddings_fix = self.parser.embedding_columns( feature_type="user_fix", use_hashmap=True) if self.hparams.use_user_id and self.hparams.user_id_only: encoded = layers.input_from_feature_columns( columns_to_tensors={"user_id_emb": features["user_id"]}, feature_columns=[embeddings_fix[0]]) else: personal_encoded = [] profile_features = {} for fs_name in self.parser.embedding_user_features_fix: profile_features.update( {fs_name + "_emb": features[fs_name]}) profile_encoded = layers.input_from_feature_columns( columns_to_tensors=profile_features, feature_columns=embeddings_fix) personal_encoded.append(profile_encoded) encoded = tf.concat(personal_encoded, -1) if self.hparams.user_fc_trans: encoded = tf.layers.dense( encoded, self.num_units, tf.nn.tanh, kernel_initializer=self.kernel_initializer) return encoded
def get_input_from_feature_columns(self): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(self.features)), partitioner=input_layer_partitioner) as dnn_input_scope: self.dnn_net = \ layers.input_from_feature_columns( columns_to_tensors=self.features, feature_columns=self.dnn_features_columns, weight_collections=[self.dnn_parent_scope], scope=dnn_input_scope) self.item_id_net = \ layers.input_from_feature_columns( columns_to_tensors=self.features, feature_columns=self.item_id_column, weight_collections=[self.dnn_parent_scope], scope=dnn_input_scope) self.bias_net = \ layers.input_from_feature_columns( columns_to_tensors=self.features, feature_columns=self.bias_columns, weight_collections=[self.dnn_parent_scope], scope=dnn_input_scope)
def _dnn_logits(self, features, is_training=False): net = layers.input_from_feature_columns( features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection]) for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units): with variable_scope.variable_op_scope( [net], "hiddenlayer_%d" % layer_id, partitioner=partitioned_variables. min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._dnn_activation_fn, variables_collections=[self._dnn_weight_collection], scope=scope) if self._dnn_dropout is not None and is_training: net = layers.dropout(net, keep_prob=(1.0 - self._dnn_dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_op_scope( [net], "dnn_logit", partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: logit = layers.fully_connected( net, self._target_column.num_label_columns, activation_fn=None, variables_collections=[self._dnn_weight_collection], scope=scope) self._add_hidden_layer_summary(logit, "dnn_logit") return logit
def _dnn_logits(self, features, is_training=False): net = layers.input_from_feature_columns( features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection]) for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units): net = layers.legacy_fully_connected( net, num_hidden_units, activation_fn=self._dnn_activation_fn, weight_collections=[self._dnn_weight_collection], bias_collections=[self._dnn_weight_collection], name="hiddenlayer_%d" % layer_id) if self._dnn_dropout is not None and is_training: net = layers.dropout( net, keep_prob=(1.0 - self._dnn_dropout)) self._add_hidden_layer_summary(net, "hiddenlayer_%d" % layer_id) logit = layers.legacy_fully_connected( net, self._num_label_columns(), weight_collections=[self._dnn_weight_collection], bias_collections=[self._dnn_weight_collection], name="dnn_logit") self._add_hidden_layer_summary(logit, "dnn_logit") return logit
def get_table(self, lang): print("Start to read item from %s, lang[%s]" % (self.item_dir, lang)) items_dict = self.read_items_dict(lang, self.item_dir) dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=self.num_ps_replicas, min_slice_size=64 << 20)) with tf.variable_scope(self.dnn_parent_scope, values=tuple(six.itervalues(self.features)), partitioner=dnn_partitioner, reuse=tf.AUTO_REUSE): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(self.features)), partitioner=input_layer_partitioner) as dnn_input_scope: item_id_net = layers.input_from_feature_columns( columns_to_tensors=items_dict, feature_columns=self.item_id_column, weight_collections=[self.dnn_parent_scope], scope=dnn_input_scope) item_id_int = tf.string_to_number(items_dict['item_id'], out_type=tf.int64) table_ref = tf.user_ops.flann_table(key_dtype=tf.float32, value_dtype=tf.int64) build_index = tf.user_ops.flann_table_insert(table_ref, item_id_net, item_id_int, algorithm=12, far_sample_n=3) self.graph.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, build_index) return table_ref
def build_model(self, features, feature_columns, is_training): """See base class.""" self._feature_columns = feature_columns input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( self._scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, self._get_feature_columns(), weight_collections=[self._scope], trainable=self._trainable, scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas)) for layer_id, num_hidden_units in enumerate(self._hidden_units): with variable_scope.variable_scope( self._scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._activation_fn, variables_collections=[self._scope], trainable=self._trainable, scope=scope) if self._dropout is not None and is_training: net = layers.dropout( net, keep_prob=(1.0 - self._dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( self._scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, self._num_label_columns, activation_fn=None, variables_collections=[self._scope], trainable=self._trainable, scope=scope) self._add_hidden_layer_summary(logits, "logits") return logits
def build_model(self, features, feature_columns, is_training): """See base class.""" self._feature_columns = feature_columns input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( self._scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, self._get_feature_columns(), weight_collections=[self._scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas)) for layer_id, num_hidden_units in enumerate(self._hidden_units): with variable_scope.variable_scope( self._scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._activation_fn, variables_collections=[self._scope], scope=scope) if self._dropout is not None and is_training: net = layers.dropout( net, keep_prob=(1.0 - self._dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( self._scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, self._num_label_columns, activation_fn=None, variables_collections=[self._scope], scope=scope) self._add_hidden_layer_summary(logits, "logits") return logits
def input_layer(inputs, features, weight_collections, scope): """ Reads input features into valid tensors Args: inputs: Input dictionary containing the data. features: List of columns to read. weight_collections: Collection where to add the trainable variables. By default added to GraphKeys.VARIABLES. scope: Name of the scope where to add the variables. Returns: Processed tensors. """ net = layers.input_from_feature_columns( inputs, features, weight_collections=weight_collections, scope=scope) return net
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): placeholder_features = ( feature_column.make_place_holder_tensors_for_base_features( self._exogenous_feature_columns)) embedded = layers.input_from_feature_columns( columns_to_tensors=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0, ) with ops.Graph().as_default(): placeholder_features = ( feature_column.make_place_holder_tensors_for_base_features( self._exogenous_feature_columns)) embedded = layers.input_from_feature_columns( columns_to_tensors=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def run(): # build mode features, click, pay = input_fn(data_file=args['train_data'], num_epochs=args['epochs'], shuffle=True, batch_size=args['batch_size']) # build columns columns = generate_tf_columns() # transform features: string -> hash -> embedding features_embedding = layers.input_from_feature_columns(features, columns.values()) p_ctr, p_cvr, p_ctcvr, ctr_loss, cvr_loss, ctcvrr_loss = \ build_gmsl(features_embedding, click, pay, is_training=True) # build optimizers loss = ctr_loss + cvr_loss + ctcvrr_loss optimizer = tf.train.AdamOptimizer(learning_rate=args['lr']) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) op = [train_op, p_ctr, p_cvr, p_ctcvr, ctr_loss, cvr_loss, ctcvrr_loss] with tf.Session() as sess: batch_count = 0 sess.run(tf.global_variables_initializer()) try: while True: values = sess.run(op) if batch_count % 10 == 0: print('[Batch-%d] : ctr_loss=%.4f, cvr_loss=%.4f, ctcvrr_loss=%.4f' % ( batch_count, values[4], values[5], values[6] )) if batch_count == 100: return batch_count += 1 except tf.errors.OutOfRangeError: print("done.")
def build_sequence_input(features, sequence_feature_columns, context_feature_columns, weight_collections=None, scope=None): """Combine sequence and context features into input for an RNN. Args: features: A `dict` containing the input and (optionally) sequence length information and initial state. sequence_feature_columns: An iterable containing all the feature columns describing sequence features. All items in the set should be instances of classes derived from `FeatureColumn`. context_feature_columns: An iterable containing all the feature columns describing context features i.e. features that apply across all time steps. All items in the set should be instances of classes derived from `FeatureColumn`. weight_collections: List of graph collections to which weights are added. scope: Optional scope, passed through to parsing ops. Returns: A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, ?]`. This will be used as input to an RNN. """ features = features.copy() features.update( layers.transform_features( features, list(sequence_feature_columns) + list(context_feature_columns or []))) sequence_input = layers.sequence_input_from_feature_columns( columns_to_tensors=features, feature_columns=sequence_feature_columns, weight_collections=weight_collections, scope=scope) if context_feature_columns is not None: context_input = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=context_feature_columns, weight_collections=weight_collections, scope=scope) sequence_input = _concatenate_context_input(sequence_input, context_input) return sequence_input
def _get_model_input(self, features, weight_collections=None, scope=None): # TODO(jamieas): add option to use context to construct initial state rather # than appending it to sequence input. initial_state = features.get(self._initial_state_key) sequence_input = layers.sequence_input_from_feature_columns( columns_to_tensors=features, feature_columns=self._sequence_feature_columns, weight_collections=weight_collections, scope=scope) if self._context_feature_columns is not None: context_input = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=self._context_feature_columns, weight_collections=weight_collections, scope=scope) sequence_input = _concatenate_context_input(sequence_input, context_input) return initial_state, sequence_input
def build_sequence_input(features, sequence_feature_columns, context_feature_columns, weight_collections=None, scope=None): """Combine sequence and context features into input for an RNN. Args: features: A `dict` containing the input and (optionally) sequence length information and initial state. sequence_feature_columns: An iterable containing all the feature columns describing sequence features. All items in the set should be instances of classes derived from `FeatureColumn`. context_feature_columns: An iterable containing all the feature columns describing context features i.e. features that apply across all time steps. All items in the set should be instances of classes derived from `FeatureColumn`. weight_collections: List of graph collections to which weights are added. scope: Optional scope, passed through to parsing ops. Returns: A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, ?]`. This will be used as input to an RNN. """ features = features.copy() features.update(layers.transform_features( features, list(sequence_feature_columns) + list(context_feature_columns or []))) sequence_input = layers.sequence_input_from_feature_columns( columns_to_tensors=features, feature_columns=sequence_feature_columns, weight_collections=weight_collections, scope=scope) if context_feature_columns is not None: context_input = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=context_feature_columns, weight_collections=weight_collections, scope=scope) sequence_input = _concatenate_context_input(sequence_input, context_input) return sequence_input
def _dnn_logits(self, features): net = layers.input_from_feature_columns( features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection]) for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units): net = layers.legacy_fully_connected( net, num_hidden_units, activation_fn=self._dnn_activation_fn, weight_collections=[self._dnn_weight_collection], bias_collections=[self._dnn_weight_collection], name="hiddenlayer_%d" % layer_id) self._add_hidden_layer_summary(net, "hiddenlayer_%d" % layer_id) logit = layers.legacy_fully_connected( net, self._num_label_columns(), weight_collections=[self._dnn_weight_collection], bias_collections=[self._dnn_weight_collection], name="dnn_logit") self._add_hidden_layer_summary(logit, "dnn_logit") return logit
def build_model(self, features, feature_columns, is_training): """See base class.""" features = self._get_feature_dict(features) self._feature_columns = feature_columns net = layers.input_from_feature_columns( features, self._get_feature_columns(), weight_collections=[self._weight_collection_name]) for layer_id, num_hidden_units in enumerate(self._hidden_units): with variable_scope.variable_op_scope( [net], "hiddenlayer_%d" % layer_id, partitioner=partitioned_variables. min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._activation_fn, variables_collections=[self._weight_collection_name], scope=scope) if self._dropout is not None and is_training: net = layers.dropout(net, keep_prob=(1.0 - self._dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_op_scope( [net], "dnn_logits", partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: logits = layers.fully_connected( net, self._num_label_columns, activation_fn=None, variables_collections=[self._weight_collection_name], scope=scope) self._add_hidden_layer_summary(logits, "dnn_logits") return logits
def build_model(self, features, feature_columns, is_training): """See base class.""" features = self._get_feature_dict(features) self._feature_columns = feature_columns net = layers.input_from_feature_columns( features, self._get_feature_columns(), weight_collections=[self._weight_collection_name]) for layer_id, num_hidden_units in enumerate(self._hidden_units): with variable_scope.variable_op_scope( [net], "hiddenlayer_%d" % layer_id, partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._activation_fn, variables_collections=[self._weight_collection_name], scope=scope) if self._dropout is not None and is_training: net = layers.dropout( net, keep_prob=(1.0 - self._dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_op_scope( [net], "dnn_logits", partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas)) as scope: logits = layers.fully_connected( net, self._num_label_columns, activation_fn=None, variables_collections=[self._weight_collection_name], scope=scope) self._add_hidden_layer_summary(logits, "dnn_logits") return logits
def create_scoring_model(features, model_params, reuse_variable_scope=False): """Creates the model that scores documents. Returns a Tensor [n_docs, 1] with document scores. """ feature_columns = model_params['feat_columns'] hidden_layers = model_params.get('hidden_layers', [20, 10]) with tf.variable_scope('dnn', values=(tuple(six.itervalues(features))), reuse=reuse_variable_scope ) as dnn_parent_scope: with tf.variable_scope('input_from_feature_columns'): # Takes the example input and converts into tensors, # according to the provided feature_columns. net = input_from_feature_columns(features, feature_columns) for layer_id, num_hidden_units in enumerate(hidden_layers): with tf.variable_scope("hiddenlayer_%d" % layer_id, values=(net,) ) as dnn_hidden_layer_scope: # Non-linear hidden layer with default activation function (ReLu). net = fully_connected(net, num_hidden_units, variables_collections=[dnn_parent_scope]) # Final output is linear, allowing for regression. return fully_connected(net, 1, activation_fn=None)
def _dnn_logits(self, features, is_training=False): net = layers.input_from_feature_columns( features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection] ) for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units): with variable_scope.variable_op_scope( [net], "hiddenlayer_%d" % layer_id, partitioner=partitioned_variables.min_max_variable_partitioner( max_partitions=self._config.num_ps_replicas ), ) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=self._dnn_activation_fn, variables_collections=[self._dnn_weight_collection], scope=scope, ) if self._dnn_dropout is not None and is_training: net = layers.dropout(net, keep_prob=(1.0 - self._dnn_dropout)) self._add_hidden_layer_summary(net, scope.name) with variable_scope.variable_op_scope( [net], "dnn_logit", partitioner=partitioned_variables.min_max_variable_partitioner(max_partitions=self._config.num_ps_replicas), ) as scope: logit = layers.fully_connected( net, self._target_column.num_label_columns, activation_fn=None, variables_collections=[self._dnn_weight_collection], scope=scope, ) self._add_hidden_layer_summary(logit, "dnn_logit") return logit
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = ( params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer( features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, dnn_to_tree_distillation_param=None, use_core_versions=False, output_type=model.ModelBuilderOutputType.MODEL_FN_OPS): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. predict_with_tree_only: Whether to use only the tree model output as the final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the float defines the weight of the distillation loss, and the loss_fn, for computing distillation loss, takes dnn_logits, tree_logits and weight tensor. If the entire tuple is None, no distillation will be applied. If only the loss_fn is None, we will take the sigmoid/softmax cross entropy loss be default. When distillation is applied, `predict_with_tree_only` will be set to True. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") if dnn_to_tree_distillation_param: if not predict_with_tree_only: logging.warning("update predict_with_tree_only to True since distillation" "is specified.") predict_with_tree_only = True # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer,)) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer,)) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append(layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features, use_core_columns=use_core_versions) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access features, head.weight_column_name) dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: # we create the loss_fn similar to the head loss_fn for # multi_class_head used previously as the default one. n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn( n_classes) dnn_to_tree_distillation_loss = loss_weight * loss_fn( dnn_logits_fixed, tree_logits, weight_tensor) summary.scalar("dnn_to_tree_distillation_loss", dnn_to_tree_distillation_loss) loss += dnn_to_tree_distillation_loss update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op if predict_with_tree_only: if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER: tree_train_logits = tree_logits else: tree_train_logits = control_flow_ops.cond( global_step > dnn_steps_to_train, lambda: tree_logits, lambda: dnn_logits) else: tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: if use_core_versions: model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ]) return model_fn_ops elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC: fusion_spec = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_spec = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) training_hooks = [ trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train, tree_spec.train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ] fusion_spec = fusion_spec._replace(training_hooks=training_hooks + list(fusion_spec.training_hooks)) return fusion_spec
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] # pylint: disable=protected-access for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # A real-valued column corresponds to a dense feature in SDCA. A # transformed tensor corresponding to a RealValuedColumn has rank 2 # (its shape is typically [batch_size, column.dimension]) and so it # can be passed to SDCA as is. dense_features.append(transformed_tensor) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn respresenting the one-hot encoding of the # bucketized feature. dense_bucket_tensor = layers.input_from_feature_columns( {column: transformed_tensor}, [column]) sparse_feature_column = _tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # For bucketized columns, the variables list contains exactly one # element. sparse_feature_with_values_weights.append( columns_to_variables[column][0]) elif isinstance(column, (layers.feature_column._CrossedColumn, layers.feature_column._SparseColumn)): sparse_features.append(sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, transformed_tensor.indices)[0], [-1]), array_ops.reshape(transformed_tensor.values, [-1]), None)) sparse_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._WeightedSparseColumn): id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_feature_with_values.append(sdca_ops.SparseFeatureColumn( array_ops.reshape( array_ops.split(1, 2, id_tensor.indices)[0], [-1]), array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape( weight_tensor.values, [-1]))) sparse_feature_with_values_weights.append( columns_to_variables[column][0]) else: raise ValueError('SDCAOptimizer does not support column type %s.' % type(column).__name__) # pylint: enable=protected-access example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float(array_ops.reshape( targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables
def _process_exogenous_features(self, times, features): """Create a single vector from exogenous features. Args: times: A [batch size, window size] vector of times for this batch, primarily used to check the shape information of exogenous features. features: A dictionary of exogenous features corresponding to the columns in self._exogenous_feature_columns. Each value should have a shape prefixed by [batch size, window size]. Returns: A Tensor with shape [batch size, window size, exogenous dimension], where the size of the exogenous dimension depends on the exogenous feature columns passed to the model's constructor. Raises: ValueError: If an exogenous feature has an unknown rank. """ if self._exogenous_feature_columns: exogenous_features_single_batch_dimension = {} for name, tensor in features.items(): if tensor.get_shape().ndims is None: # input_from_feature_columns does not support completely unknown # feature shapes, so we save on a bit of logic and provide a better # error message by checking that here. raise ValueError( ("Features with unknown rank are not supported. Got shape {} for " "feature {}.").format(tensor.get_shape(), name)) tensor_shape_dynamic = array_ops.shape(tensor) tensor = array_ops.reshape( tensor, array_ops.concat([[tensor_shape_dynamic[0] * tensor_shape_dynamic[1]], tensor_shape_dynamic[2:]], axis=0)) # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. if tensor.get_shape().ndims == 1: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( layers.input_from_feature_columns( columns_to_tensors=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( embedded_exogenous_features_single_batch_dimension, array_ops.concat( [ array_ops.shape(times), array_ops.shape( embedded_exogenous_features_single_batch_dimension)[1:] ], axis=0)) exogenous_regressors.set_shape(times.get_shape().concatenate( embedded_exogenous_features_single_batch_dimension.get_shape()[1:])) exogenous_regressors = math_ops.cast( exogenous_regressors, dtype=self.dtype) else: # Not having any exogenous features is a special case so that models can # avoid superfluous updates, which may not be free of side effects due to # bias terms in transformations. exogenous_regressors = None return exogenous_regressors
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined." ) features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) input_layer_scope = dnn_parent_scope + "/input_from_feature_columns" with variable_scope.variable_scope( input_layer_scope, values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, input_layer_scope), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate( len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.create_model_fn_ops(features, labels, mode, _make_training_op, logits=logits)
dataset = tf.data.Dataset.from_tensor_slices((features, labels)) dataset = dataset.batch(2) return dataset def get_feature_column(): feature_name = 'gender' sparse_id_column = layers.sparse_column_with_hash_bucket( column_name=feature_name, hash_bucket_size=100) feature_column = layers.embedding_column(sparse_id_column, dimension=10) return feature_column features, _labels = get_dataset().make_one_shot_iterator().get_next() feature_column = get_feature_column() result = layers.input_from_feature_columns(features, [feature_column]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(2): result_eval = sess.run(fetches=[result]) print(result_eval) """ [array([[-0.03351991, 0.13861407, 0.15951617, -0.10525074, -0.02098984, 0.11429874, 0.07259871, -0.05891977, 0.13090345, -0.04620567], [-0.03351991, 0.13861407, 0.15951617, -0.10525074, -0.02098984, 0.11429874, 0.07259871, -0.05891977, 0.13090345, -0.04620567]], dtype=float32)] [array([[-0.03351991, 0.13861407, 0.15951617, -0.10525074, -0.02098984, 0.11429874, 0.07259871, -0.05891977, 0.13090345, -0.04620567], [-0.00928837, -0.06804372, 0.10571972, -0.18538876, -0.11762749,
def _dnn_model_fn(features, labels, mode, params): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = params.get("num_ps_replicas", 0) features = _get_feature_dict(features) parent_scope = "dnn" input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[]) return head.head_ops(features, labels, mode, _train_op_fn, logits)
def parse_feature_columns_from_examples_test(): """Construct examples by tf.train.Example. Then, parse feature columns from examples. Finally, get input from feature columns. Returns: The input tensor transformed from examples in defined feature columns format. """ language_column = layers.sparse_column_with_hash_bucket( "language", hash_bucket_size=20) feature_columns = [ layers.embedding_column(language_column, dimension=3), layers.real_valued_column("age", dtype=tf.int64) ] example1 = tf.train.Example(features=tf.train.Features( feature={ "age": tf.train.Feature(int64_list=tf.train.Int64List(value=[18])), "language": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"en"])) })) example2 = tf.train.Example(features=tf.train.Features( feature={ "age": tf.train.Feature(int64_list=tf.train.Int64List(value=[20])), "language": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"fr"])) })) example3 = tf.train.Example(features=tf.train.Features( feature={ "age": tf.train.Feature(int64_list=tf.train.Int64List(value=[25])), "language": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"en"])) })) examples = [ example1.SerializeToString(), example2.SerializeToString(), example3.SerializeToString() ] print(examples) # feature_lists = tf.train.FeatureLists( # feature_list={ # "age": tf.train.FeatureList( # feature=[ # tf.train.Feature(int64_list=tf.train.Int64List(value=[18])), # tf.train.Feature(int64_list=tf.train.Int64List(value=[20])), # tf.train.Feature(int64_list=tf.train.Int64List(value=[25])), # ] # ), # "language": tf.train.FeatureList( # feature=[ # tf.train.Feature(bytes_list=tf.train.BytesList(value=[ # b"en"])), # tf.train.Feature(bytes_list=tf.train.BytesList(value=[ # b"fr"])), # tf.train.Feature(bytes_list=tf.train.BytesList(value=[ # b"zh"])) # ] # ) # } # ) # print(feature_lists) # serialized = feature_lists.SerializeToString() columns_to_tensor = layers.parse_feature_columns_from_examples( serialized=examples, feature_columns=feature_columns) input_layer = layers.input_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns) print("input_layer:\n", str(input_layer)) sess = tf.InteractiveSession() tf.initialize_all_variables().run(session=sess) print(input_layer.eval(session=sess))
def _model_fn(features, labels, mode): """ :param features: :param labels: :param mode: :return: """ # Pop the name of the signal. if 'FN' in features: names = features.pop('FN') if 'FT' in features: labels = features.pop('FT') # Define the type of the inputs (they are all numeric). columns = [ layers.real_valued_column(key) for key, value in features.items() ] # inputs = layers.input_from_feature_columns(features, columns) # Declare the hidden_layers variable. hidden_layers = None # Iterate all over the hidden units. for unit in hidden_units: # Create a new hidden layer. hidden_layers = tf.layers.dense( inputs=inputs if hidden_layers is None else hidden_layers, activation=tf.nn.relu, units=unit, ) # Create a dropout layer. dropout_layer = layers.dropout(inputs=hidden_layers, keep_prob=1.0 - dropout) # Create the logits layer. logits = tf.layers.dense(inputs=dropout_layer, activation=None, units=2) if mode in (ModeKeys.PREDICT, ModeKeys.EVAL): # Calculate the probabilities. probabilities = tf.nn.softmax(logits) # And their indexes. predictions = tf.argmax(logits, 1) if mode in (ModeKeys.EVAL, ModeKeys.TRAIN): # Convert the labels in the one_hot format. onehot_labels = tf.one_hot(indices=labels, depth=2) # Define the class weights. class_weights = tf.constant(weights) # Deduce weights for batch samples based on their true label. reduced_weights = tf.reduce_sum(class_weights * onehot_labels, axis=1) # Compute your (unweighted) softmax cross entropy loss. unweighted_losses = tf.nn.softmax_cross_entropy_with_logits( labels=onehot_labels, logits=logits) # Apply the weights, relying on broadcasting of the multiplication. weighted_losses = unweighted_losses * reduced_weights # Reduce the result to get your final loss. loss = tf.reduce_mean(weighted_losses) if mode == ModeKeys.PREDICT: # Convert predicted_indices back into strings predictions = { 'classes': predictions, 'scores': probabilities, } # export_outputs = { # 'prediction': tf.estimator.export.PredictOutput(predictions) # } # return tf.estimator.EstimatorSpec( # mode=mode, # predictions=predictions, # # export_outputs=export_outputs, # ) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, ) if mode == ModeKeys.TRAIN: # Define the training rule. train_op = layers.optimize_loss( loss=loss, global_step=framework.get_global_step(), learning_rate=learning_rate, optimizer='SGD') return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) if mode == ModeKeys.EVAL: # Define the metrics to show up in the evaluation process. eval_metric_ops = { 'accuracy': metrics.streaming_accuracy(predictions=predictions, labels=labels), 'auroc': metrics.streaming_auc(predictions=predictions, labels=labels), 'recall': metrics.streaming_recall(predictions=predictions, labels=labels), 'precision': metrics.streaming_precision(predictions=predictions, labels=labels), 'TP': metrics.streaming_true_positives(predictions=predictions, labels=labels), 'FN': metrics.streaming_false_negatives(predictions=predictions, labels=labels), 'FP': metrics.streaming_false_positives(predictions=predictions, labels=labels), 'TN': metrics.streaming_true_negatives(predictions=predictions, labels=labels), #'gaccuracy' : metrics.streaming_accuracy(predictions=GP, labels=GL) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def _dnn_classifier_model_fn(features, targets, mode, params): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] activation_fn = params["activation_fn"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] enable_centered_bias = params["enable_centered_bias"] num_ps_replicas = params["num_ps_replicas"] features = _get_feature_dict(features) parent_scope = "dnn" num_label_columns = 1 if n_classes == 2 else n_classes if n_classes == 2: loss_fn = loss_ops.sigmoid_cross_entropy else: loss_fn = loss_ops.sparse_softmax_cross_entropy input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected( net, num_label_columns, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) if mode == estimator.ModeKeys.TRAIN: targets = _reshape_targets(targets) weight = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, targets, weight=weight) loss = _rescale_eval_loss(training_loss, weight) train_ops = [optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[])] if enable_centered_bias: train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns)) logging_ops.scalar_summary("loss", loss) return None, loss, control_flow_ops.group(*train_ops) elif mode == estimator.ModeKeys.EVAL: predictions = _predictions(logits=logits, n_classes=n_classes) targets = _reshape_targets(targets) weight = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, targets, weight=weight) loss = _rescale_eval_loss(training_loss, weight) return predictions, loss, [] else: # mode == estimator.ModeKeys.INFER: predictions = _predictions(logits=logits, n_classes=n_classes) return predictions, None, []
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. Note that a string containing the unqualified name of the op may also be provided, e.g., "relu", "tanh", or "sigmoid". * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = nn.relu dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") features = _get_feature_dict(features) parent_scope = "dnn" with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(features.values())) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer(features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) with variable_scope.variable_scope(parent_scope, values=tuple(features.values())): for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net, )) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope("logits", values=(net, )) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=optimizer(), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def my_model(features, labels, mode, params): user_columns = params['user_columns'] item_columns = params['item_columns'] user_features = {} item_features = {} for name in user_columns: if name in features: user_features[name] = features[name] for name in item_columns: if name in features: item_features[name] = features[name] with tf.variable_scope("user_net") as scope1: user_net = tf.feature_column.input_layer( user_features, params['user_feature_columns']) for units in params['user_hidden_units']: user_net = tf.layers.dense(user_net, units=units, activation=tf.nn.relu) user_net = tf.layers.dense(user_net, params.get("emb_size"), activation=None) user_net_out = tf.reduce_join(tf.as_string(user_net, 8), -1, separator=',') with tf.variable_scope("item_net") as scope2: item_net = layers.input_from_feature_columns( item_features, params['item_feature_columns']) for units in params['item_hidden_units']: item_net = tf.layers.dense(item_net, units=units, activation=tf.nn.relu) item_net = tf.layers.dense(item_net, params.get("emb_size"), activation=None) item_net_out = tf.reduce_join(tf.as_string(item_net, 8), -1, separator=',') c = tf.layers.dense(tf.concat([user_net, item_net], -1), units=128, activation=tf.nn.relu) logits = layers.linear(c, 1, biases_initializer=None) # Compute predictions. predicted_classes = tf.argmax(logits, 1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'class_ids': predicted_classes[:, tf.newaxis], 'probabilities': tf.nn.sigmoid(logits), 'logits': logits, 'user_emb': user_net_out, 'item_emb': item_net_out, 'user_id': features['user_id'], "item_id": features['item_id'] } return tf.estimator.EstimatorSpec(mode, predictions=predictions) # Compute loss. labels = tf.reshape(tf.cast(labels, tf.float32), [-1, 1]) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels)) auc = tf.metrics.auc(labels=labels, predictions=tf.nn.sigmoid(logits), name='auc') logging_hook = tf.train.LoggingTensorHook({ "loss": loss, "auc": auc[1] }, every_n_iter=100) metrics = {'auc': auc} tf.summary.scalar('accuracy', auc[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
def _process_exogenous_features(self, times, features): """Create a single vector from exogenous features. Args: times: A [batch size, window size] vector of times for this batch, primarily used to check the shape information of exogenous features. features: A dictionary of exogenous features corresponding to the columns in self._exogenous_feature_columns. Each value should have a shape prefixed by [batch size, window size]. Returns: A Tensor with shape [batch size, window size, exogenous dimension], where the size of the exogenous dimension depends on the exogenous feature columns passed to the model's constructor. Raises: ValueError: If an exogenous feature has an unknown rank. """ if self._exogenous_feature_columns: exogenous_features_single_batch_dimension = {} for name, tensor in features.items(): if tensor.get_shape().ndims is None: # input_from_feature_columns does not support completely unknown # feature shapes, so we save on a bit of logic and provide a better # error message by checking that here. raise ValueError(( "Features with unknown rank are not supported. Got shape {} for " "feature {}.").format(tensor.get_shape(), name)) tensor_shape_dynamic = array_ops.shape(tensor) tensor = array_ops.reshape( tensor, array_ops.concat( [[tensor_shape_dynamic[0] * tensor_shape_dynamic[1]], tensor_shape_dynamic[2:]], axis=0)) # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. if tensor.get_shape().ndims == 1: exogenous_features_single_batch_dimension[ name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( layers.input_from_feature_columns( columns_to_tensors= exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( embedded_exogenous_features_single_batch_dimension, array_ops.concat([ array_ops.shape(times), array_ops.shape( embedded_exogenous_features_single_batch_dimension)[1:] ], axis=0)) exogenous_regressors.set_shape(times.get_shape().concatenate( embedded_exogenous_features_single_batch_dimension.get_shape() [1:])) exogenous_regressors = math_ops.cast(exogenous_regressors, dtype=self.dtype) else: # Not having any exogenous features is a special case so that models can # avoid superfluous updates, which may not be free of side effects due to # bias terms in transformations. exogenous_regressors = None return exogenous_regressors
def build_network_my(self, num_factor=10, num_factor_mlp=64, hidden_dimension=10, num_neg_sample=30): print("my network") self.num_neg_sample = num_neg_sample self.user_id = tf.placeholder(dtype=tf.string, shape=[None], name='user_id') self.item_id = tf.placeholder(dtype=tf.string, shape=[None], name='item_id') ##########################################################################3 self.target_item_id = tf.placeholder(dtype=tf.string, shape=[None], name='target_item_id') self.hot_item_id = tf.placeholder(dtype=tf.string, shape=[None], name='hot_item_id') self.long_item_id = tf.placeholder(dtype=tf.string, shape=[None], name='long_item_id') ########################################################################### self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') self.par = tf.placeholder(dtype=tf.float32) ################################################################################### ################################################################################## a = {'user': self.user_id} b = {'item': self.item_id} c = {'item': self.target_item_id} d = {'user_low': self.user_id} e = {'item_low': self.item_id} f = {'item_low': self.target_item_id} h = {'item': self.hot_item_id} l = {'item': self.long_item_id} with tf.variable_scope(name_or_scope='embedding', reuse=tf.AUTO_REUSE) as scope: id_feature1 = layers.sparse_column_with_hash_bucket( column_name='user', hash_bucket_size=190000 # use_hashmap=use_hashmap ) id_feature2 = layers.sparse_column_with_hash_bucket( column_name='item', hash_bucket_size=120000 # use_hashmap=use_hashmap ) shared_embedding_columns1 = layers.embedding_column( id_feature1, dimension=64, combiner="mean") # # shared_embedding_columns2 = layers.embedding_column( id_feature2, dimension=64, combiner="mean") a1 = [] a1.append(shared_embedding_columns1) b1 = [] b1.append(shared_embedding_columns2) # mlp_user_latent_factor = layers.input_from_feature_columns( a, a1, scope='user') mlp_item_latent_factor = layers.input_from_feature_columns( b, b1, scope='item') mlp_target_item_latent_factor = layers.input_from_feature_columns( c, b1, scope='item') ######################################################################################### mlp_hot_item_latent_factor = layers.input_from_feature_columns( h, b1, scope='item') mlp_long_item_latent_factor = layers.input_from_feature_columns( l, b1, scope='item') ######################################################################################### id_feature3 = layers.sparse_column_with_hash_bucket( column_name='user_low', hash_bucket_size=190000 # use_hashmap=use_hashmap ) id_feature4 = layers.sparse_column_with_hash_bucket( column_name='item_low', hash_bucket_size=120000 # use_hashmap=use_hashmap ) shared_embedding_columns3 = layers.embedding_column( id_feature3, dimension=10, combiner="mean") # # shared_embedding_columns4 = layers.embedding_column( id_feature4, dimension=10, combiner="mean") d1 = [] d1.append(shared_embedding_columns3) e1 = [] e1.append(shared_embedding_columns4) # user_latent_factor = layers.input_from_feature_columns( d, d1, scope='user_low') item_latent_factor = layers.input_from_feature_columns( e, e1, scope='item_low') target_item_latent_factor = layers.input_from_feature_columns( f, e1, scope='item_low') ################################################################################### ################################################################################################### ################################################################################################### GMF = tf.multiply(user_latent_factor, item_latent_factor) ##################################################################### GMF_target = tf.multiply(user_latent_factor, target_item_latent_factor) ##################################################################### user_feature = self.user_side(mlp_user_latent_factor) item_feature = self.item_side(mlp_item_latent_factor) ######################################################### target_item_feature = self.item_side(mlp_target_item_latent_factor, reuse=True) hot_item_feature = self.item_side(mlp_hot_item_latent_factor, reuse=True) long_item_feature = self.item_side(mlp_long_item_latent_factor, reuse=True) ######################################################### self.pair_loss = 0 self.resort_item = [] self.resort_label = [] for i in range(0, self.batch_size): temp1 = [] temp2 = [] temp1.append(item_feature[i * self.batch_size:(i + 1) * self.batch_size, :]) temp2.append(self.y[i * self.batch_size:(i + 1) * self.batch_size]) self.resort_item.append(temp1) self.resort_label.append(temp2) discriminative_loss = [] for i in range(0, self.batch_size): discriminative_loss.append( get_center_loss(tf.reshape(self.resort_item[i], (-1, 128)), tf.reshape(self.resort_label[i], (-1, 1)), 2)) for i in range(0, self.batch_size): self.pair_loss = self.pair_loss + discriminative_loss[ i] / self.batch_size # ######################################################### # self.userF=user_feature # self.itemF=item_feature # ######################################### # # self.pred_y = tf.nn.sigmoid( # # tf.reduce_sum( 5 * tf.multiply(user_feature, item_feature),1)) # ######################################## self.pred_y = tf.nn.sigmoid( tf.reduce_sum( tf.concat([GMF, 5 * tf.multiply(user_feature, item_feature)], axis=1), 1)) # self.pred_long=tf.nn.sigmoid(tf.reduce_sum(tf.concat([GMF,5*tf.multiply(user_feature, target_item_feature)], axis=1), 1)) avg_GMF = tf.reduce_mean(GMF) # avg_GMF=tf.stop_gradient(tf.identity(tf.reduce_mean(GMF))) self.pred_long = tf.nn.sigmoid(avg_GMF + tf.reduce_sum( 5 * tf.multiply(user_feature, target_item_feature), 1)) # self.pred_y = tf.layers.dense(inputs=tf.concat([GMF, MLP], axis=1), units=1, activation=tf.sigmoid, kernel_initializer=tf.random_normal_initializer, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) #Pseudo label self.p1 = tf.reshape( tf.gather( self.pred_long, tf.reshape(tf.where(tf.less(self.pred_long, 0.2)), [ -1, ])), [-1, 1]) self.p2 = tf.reshape( tf.gather( self.pred_long, tf.reshape(tf.where(tf.greater(self.pred_long, 0.8)), [ -1, ])), [-1, 1]) self.tar1 = tf.maximum( 0.0, tf.reduce_mean(-self.p1 * tf.log( tf.clip_by_value(self.p1, 0.005, 1)))) #/ self.batch_size self.tar2 = tf.maximum( 0.0, tf.reduce_mean(-self.p2 * tf.log( tf.clip_by_value(self.p2, 0.005, 1)))) #/ self.batch_size self.pseudo_loss = self.tar1 + self.tar2 # self.loss = - tf.reduce_sum( # self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.pred_y, labels=self.y)) self.weight_loss = 0.01 * tf.losses.get_regularization_loss( ) #+ self.reg_rate * ( # tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q) + tf.nn.l2_loss(self.mlp_P) + tf.nn.l2_loss(self.mlp_Q)) # self.DAloss=tf.maximum(0.0001,KMMD(hot_item_feature,long_item_feature)) self.DAloss = self.coral_loss(hot_item_feature, long_item_feature) # self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) # self.total_loss=self.loss+self.weight_loss+100*self.DAloss # self.total_loss=self.loss+self.weight_loss+100*self.DAloss # self.total_loss = self.loss + self.weight_loss # self.total_loss=self.loss+self.weight_loss+100*self.DAloss+0.001*self.par*self.pseudo_loss+0.001*self.par*self.pair_loss self.total_loss = self.loss + self.weight_loss + self.A2C_weight * self.DAloss + self.pseudo_weight * self.par * self.pseudo_loss + self.center_weight * self.par * self.pair_loss self.optimizer = tf.train.AdamOptimizer(0.0001).minimize( self.total_loss) return self
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, dnn_to_tree_distillation_param=None, use_core_versions=False, output_type=model.ModelBuilderOutputType.MODEL_FN_OPS, override_global_step_value=None): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. predict_with_tree_only: Whether to use only the tree model output as the final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the float defines the weight of the distillation loss, and the loss_fn, for computing distillation loss, takes dnn_logits, tree_logits and weight tensor. If the entire tuple is None, no distillation will be applied. If only the loss_fn is None, we will take the sigmoid/softmax cross entropy loss be default. When distillation is applied, `predict_with_tree_only` will be set to True. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec (new interface). override_global_step_value: If after the training is done, global step value must be reset to this value. This is particularly useful for hyper parameter tuning, which can't recognize early stopping due to the number of trees. If None, no override of global step will happen. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") if dnn_to_tree_distillation_param: if not predict_with_tree_only: logging.warning( "update predict_with_tree_only to True since distillation" "is specified.") predict_with_tree_only = True # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features, use_core_columns=use_core_versions) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access features, head.weight_column_name) dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: # we create the loss_fn similar to the head loss_fn for # multi_class_head used previously as the default one. n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn( n_classes) dnn_to_tree_distillation_loss = loss_weight * loss_fn( dnn_logits_fixed, tree_logits, weight_tensor) summary.scalar("dnn_to_tree_distillation_loss", dnn_to_tree_distillation_loss) loss += dnn_to_tree_distillation_loss update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op if predict_with_tree_only: if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER: tree_train_logits = tree_logits else: tree_train_logits = control_flow_ops.cond( global_step > dnn_steps_to_train, lambda: tree_logits, lambda: dnn_logits) else: tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: if use_core_versions: model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ]) return model_fn_ops elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC: fusion_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec(features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) training_hooks = [ trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train, tree_spec.train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ] fusion_spec = fusion_spec._replace(training_hooks=training_hooks + list(fusion_spec.training_hooks)) return fusion_spec
def _dnn_classifier_model_fn(features, labels, mode, params): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of label classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] activation_fn = params["activation_fn"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] enable_centered_bias = params["enable_centered_bias"] num_ps_replicas = params["num_ps_replicas"] features = _get_feature_dict(features) parent_scope = "dnn" num_label_columns = 1 if n_classes == 2 else n_classes if n_classes == 2: loss_fn = loss_ops.sigmoid_cross_entropy else: loss_fn = loss_ops.sparse_softmax_cross_entropy input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected(net, num_label_columns, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) if mode == estimator.ModeKeys.TRAIN: labels = _reshape_labels(labels) weights = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, labels, weights=weights) loss = _rescale_eval_loss(training_loss, weights) train_ops = [ optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[]) ] if enable_centered_bias: train_ops.append( _centered_bias_step(labels, loss_fn, num_label_columns)) summary.scalar("loss", loss) return None, loss, control_flow_ops.group(*train_ops) elif mode == estimator.ModeKeys.EVAL: predictions = _predictions(logits=logits, n_classes=n_classes) labels = _reshape_labels(labels) weights = _get_weight_tensor(features, weight_column_name) training_loss = loss_fn(logits, labels, weights=weights) loss = _rescale_eval_loss(training_loss, weights) return predictions, loss, [] else: # mode == estimator.ModeKeys.INFER: predictions = _predictions(logits=logits, n_classes=n_classes) return predictions, None, []
def _dnn_model_fn(features, labels, mode, params): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = params.get("num_ps_replicas", 0) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) input_layer_scope = parent_scope + "/input_from_feature_columns" with variable_scope.variable_scope( input_layer_scope, values=list(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: logits = layers.fully_connected(net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=scope) _add_hidden_layer_summary(logits, scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[]) return head.head_ops(features, labels, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. Returns: `estimator.ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = params["num_ps_replicas"] if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( dnn_parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.head_ops( features, labels, mode, _make_training_op, logits=logits)
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = params.get("activation_fn") dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" # Synthetic minority over-sampling technique # to overcome the lack of B and S signals in the training data if mode == model_fn.ModeKeys.TRAIN: sm = SMOTE(ratio=0.1, k=5, kind='regular', random_state=10) sess = tf.Session(config=tf.ConfigProto(operation_timeout_in_ms=500)) with sess: features, labels = sm.fit_sample(features, labels.eval().ravel()) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope(parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all([ isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ]): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer(features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net, )) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope("logits", values=(net, )) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, tree_center_bias=True): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer,)) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer,)) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append(layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp( dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees( num_trees, attempted_trees, finalized_trees)]) return model_fn_ops
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all( isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all(isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
def _dnn_tree_combined_model_fn(features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if use_core_versions: model_fn_ops = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ]) return model_fn_ops