def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def testAttentionCellWrapperCorrectResult(self): num_units = 4 attn_length = 6 batch_size = 2 expected_output = np.array( [[1.068372, 0.45496, -0.678277, 0.340538], [1.018088, 0.378983, -0.572179, 0.268591]], dtype=np.float32) expected_state = np.array( [[0.74946702, 0.34681597, 0.26474735, 1.06485605, 0.38465962, 0.11420801, 0.10272158, 0.30925757, 0.63899988, 0.7181077, 0.47534478, 0.33715725, 0.58086717, 0.49446869, 0.7641536, 0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063, 0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228, 0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432, 0.99211812, 0.12295902, 1.14606023, 0.34370938, -0.79251152, 0.51843399], [0.5179342, 0.48682183, -0.25426468, 0.96810579, 0.28809637, 0.13607743, -0.11446252, 0.26792109, 0.78047138, 0.63460857, 0.49122369, 0.52007174, 0.73000264, 0.66986895, 0.73576689, 0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957, 0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421, 0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971, 0.36127412, 0.12125921, 1.1362772, 0.34361625, -0.78150457, 0.70582712]], dtype=np.float32) seed = 12345 random_seed.set_random_seed(seed) for state_is_tuple in [False, True]: with session.Session() as sess: with variable_scope.variable_scope( "state_is_tuple", reuse=state_is_tuple, initializer=init_ops.glorot_uniform_initializer()): lstm_cell = core_rnn_cell_impl.BasicLSTMCell( num_units, state_is_tuple=state_is_tuple) cell = rnn_cell.AttentionCellWrapper( lstm_cell, attn_length, state_is_tuple=state_is_tuple) zeros1 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 1) zeros2 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 2) zeros3 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 3) attn_state_zeros = random_ops.random_uniform( (batch_size, attn_length * num_units), 0.0, 1.0, seed=seed + 4) zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros) if not state_is_tuple: zero_state = array_ops.concat([ zero_state[0][0], zero_state[0][1], zero_state[1], zero_state[2] ], 1) inputs = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 5) output, state = cell(inputs, zero_state) if state_is_tuple: state = array_ops.concat( [state[0][0], state[0][1], state[1], state[2]], 1) sess.run(variables.global_variables_initializer()) self.assertAllClose(sess.run(output), expected_output) self.assertAllClose(sess.run(state), expected_state)
def test_glorot_uniform_initializer(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) self._runner( init_ops.glorot_uniform_initializer(seed=123), tensor_shape, target_mean=0., target_std=std)
def __init__(self, reference_dims, hypothesis_dims, hidden_dims, float_dtype, dropout_attn, training, name, attn_type='multiplicative'): # Declare attributes self.reference_dims = reference_dims self.hypothesis_dims = hypothesis_dims self.hidden_dims = hidden_dims self.float_dtype = float_dtype self.dropout_attn = dropout_attn self.attn_type = attn_type self.training = training self.name = name assert attn_type in ['additive', 'multiplicative'], 'Attention type {:s} is not supported.'.format(attn_type) # Instantiate parameters with tf.variable_scope(self.name): self.queries_projection = None self.attn_weight = None if attn_type == 'additive': self.queries_projection = FeedForwardLayer(self.hypothesis_dims, self.hidden_dims, float_dtype, dropout_rate=0., activation=None, use_bias=False, use_layer_norm=False, training=self.training, name='queries_projection') self.attn_weight = tf.get_variable(name='attention_weight', shape=self.hidden_dims, dtype=float_dtype, initializer=glorot_uniform_initializer(), trainable=True) self.keys_projection = FeedForwardLayer(self.reference_dims, self.hidden_dims, float_dtype, dropout_rate=0., activation=None, use_bias=False, use_layer_norm=False, training=self.training, name='keys_projection')
def build(self, input_shape): v_shape = tensor_shape.TensorShape(input_shape[1]) dim = v_shape[-1] if isinstance(dim, tensor_shape.Dimension): dim = dim.value if self.use_scale: self.scale = self.add_weight( name='scale', shape=[dim], initializer=init_ops.glorot_uniform_initializer(), dtype=self.dtype, trainable=True) else: self.scale = None super(AdditiveAttention, self).build(input_shape)
def __init__(self, vocabulary_size, embedding_size, hidden_size, float_dtype, name): # Set arguments self.vocabulary_size = vocabulary_size self.hidden_size = hidden_size self.float_dtype = float_dtype self.name = name # Create embedding matrix and its transposes with tf.variable_scope(self.name): self.embedding_table = tf.get_variable(name='embedding_table', shape=[vocabulary_size, embedding_size], dtype=float_dtype, initializer=glorot_uniform_initializer(), trainable=True) self.projection_matrix = tf.transpose(self.embedding_table, name='vocab_projection_matrix')
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = seq_fc.concatenate_context_input( context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) last_activations = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs,)): logits = core_layers.dense( last_activations, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def __init__(self, num_units, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0.), rnn_mode='lstm', num_layers=1, skip_input=False, is_bidirectional=False, return_states=False, dropout=0., **kwargs): super(CudnnRNN, self).__init__(**kwargs) # ====== defaults recurrent control ====== # self.num_units = int(num_units) self.num_layers = int(num_layers) self.rnn_mode = str(rnn_mode) self.skip_input = bool(skip_input) self.is_bidirectional = bool(is_bidirectional) self.return_states = bool(return_states) self.dropout = dropout self.W_init = W_init self.b_init = b_init if skip_input: wprint("`skip_input` is not supported in Tensorflow.")
def __init__(self, in_size, out_size, float_dtype, dropout_rate, activation, use_bias, use_layer_norm, training, name): # Set attributes self.in_size = in_size self.out_size = out_size self.dropout_rate = dropout_rate self.activation = activation self.use_bias = use_bias self.training = training self.name = name with tf.variable_scope(self.name): # Set up layer normalization if use_layer_norm: self.layer_norm_layer = LayerNormLayer(out_size) else: self.layer_norm_layer = None # Define parameters weights_shape = [in_size, out_size] if out_size is not None else [in_size] self.weights = tf.get_variable(name='dense_layer_weights', shape=weights_shape, dtype=float_dtype, initializer=glorot_uniform_initializer(), trainable=True) if use_bias: biases_shape = [out_size] if out_size is not None else [in_size] self.biases = tf.get_variable(name='dense_layer_biases', shape=biases_shape, dtype=float_dtype, initializer=tf.zeros_initializer(), trainable=True)
def __init__(self, hparams, item, cluster, controller_id=0): """HierarchicalController class initializer. Args: hparams: All hyper-parameters. item: The metagraph to place. cluster: The cluster of hardware devices to optimize for. controller_id: the id of the controller in a multi-controller setup. """ super(HierarchicalController, self).__init__(item, cluster) self.ctrl_id = controller_id self.hparams = hparams if self.hparams.num_groups is None: self.num_groups = min(256, 20 * self.num_devices) else: self.num_groups = self.hparams.num_groups # creates self.op_embeddings and self.type_dict self.create_op_embeddings(verbose=False) # TODO(azalia) clean up embedding/group_embedding_size names self.group_emb_size = ( 2 * self.num_groups + len(self.type_dict) + self.hparams.max_num_outputs * self.hparams.max_output_size) self.embedding_size = self.group_emb_size self.initializer = init_ops.glorot_uniform_initializer( seed=self.hparams.seed) with variable_scope.variable_scope( self.hparams.name, initializer=self.initializer, reuse=variable_scope.AUTO_REUSE): # define parameters of feedforward variable_scope.get_variable("w_grouping_ff", [ 1 + self.hparams.max_num_outputs * self.hparams.max_output_size + self.hparams.adj_embed_dim, self.hparams.grouping_hidden_size ]) variable_scope.get_variable( "w_grouping_softmax", [self.hparams.grouping_hidden_size, self.num_groups]) if self.hparams.bi_lstm: variable_scope.get_variable("encoder_lstm_forward", [ self.embedding_size + self.hparams.hidden_size / 2, 2 * self.hparams.hidden_size ]) variable_scope.get_variable("encoder_lstm_backward", [ self.embedding_size + self.hparams.hidden_size / 2, 2 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_embeddings", [self.num_devices, self.hparams.hidden_size]) variable_scope.get_variable( "decoder_lstm", [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size]) variable_scope.get_variable( "device_softmax", [2 * self.hparams.hidden_size, self.num_devices]) variable_scope.get_variable("device_go_embedding", [1, self.hparams.hidden_size]) variable_scope.get_variable( "encoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "decoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable( "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) else: variable_scope.get_variable("encoder_lstm", [ self.embedding_size + self.hparams.hidden_size, 4 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_embeddings", [self.num_devices, self.hparams.hidden_size]) variable_scope.get_variable( "decoder_lstm", [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size]) variable_scope.get_variable( "device_softmax", [2 * self.hparams.hidden_size, self.num_devices]) variable_scope.get_variable("device_go_embedding", [1, self.hparams.hidden_size]) variable_scope.get_variable( "encoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "decoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable( "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) seq2seq_input_layer = array_ops.placeholder_with_default( array_ops.zeros([1, self.num_groups, self.group_emb_size], dtypes.float32), shape=(1, self.num_groups, self.group_emb_size)) self.seq2seq_input_layer = seq2seq_input_layer
def _initialize(self, X): # ====== check inputs dimensions ====== # if not hasattr(X, 'shape'): raise ValueError("`X` must have `shape` attribute.") feat_dim = np.prod(X.shape[1:]) if self._feat_dim is None: self._feat_dim = feat_dim # validate input dimension if feat_dim != self._feat_dim: raise RuntimeError("Feature dimension mismatch %d and %d" % (feat_dim, self.feat_dim)) # check if tensorflow op initalized if hasattr(self, '_f_train'): return # ====== binary or multi-classes ====== # if self.nb_classes == 2: out_shape = (None, ) fn_activation = tf.nn.sigmoid fn_loss = tf.losses.sigmoid_cross_entropy fn_acc = K.metrics.binary_accuracy else: out_shape = (None, self.nb_classes) fn_activation = tf.nn.softmax fn_loss = tf.losses.softmax_cross_entropy fn_acc = K.metrics.categorical_accuracy # ====== create model ====== # with tf.name_scope(self.name, 'logistic_regression'): # inputs self._X = K.placeholder(shape=(None, self.feat_dim), dtype=self.dtype, name='%s_input' % self.name) self._y = K.placeholder(shape=out_shape, dtype=self.dtype, name='%s_output' % self.name) # check the bias if is_number(self.fit_intercept): b_init = float(self.fit_intercept) elif self.fit_intercept is False or \ self.fit_intercept is None: b_init = None else: b_init = self.fit_intercept # create the model and initialize with K.variable_dtype(dtype=self.dtype): self._model = N.Dense( num_units=self.nb_classes, W_init=init_ops.glorot_uniform_initializer( seed=self._rand_state.randint()), b_init=b_init, activation=K.linear) y_logits = self._model(self._X) y_prob = fn_activation(y_logits) # applying class weights class_weights = tf.constant(value=self._class_weight, dtype=self.dtype, name="class_weights") weights = tf.gather( class_weights, tf.cast(self._y, 'int32') if self.nb_classes == 2 else tf.argmax(self._y, axis=-1)) # optimizer params = [ v for v in self._model.variables if has_roles(v, Weight) or has_roles(v, Bias) ] losses = fn_loss(self._y, y_logits, weights=weights) l1_norm = tf.norm(self._model.get('W'), ord=1) if self.l1 > 0. else 0 l2_norm = tf.norm(self._model.get('W'), ord=2) if self.l2 > 0. else 0 losses = losses + self.l1 * l1_norm + self.l2 * l2_norm acc = fn_acc(self._y, y_prob) updates = self._optimizer.get_updates(losses, params) # create function if self.confusion_matrix: cm = K.metrics.confusion_matrix(y_true=self._y, y_pred=y_prob, labels=self.nb_classes) metrics = [losses, acc, cm ] if self.confusion_matrix else [losses, acc] self._f_train = K.function(inputs=(self._X, self._y), outputs=metrics, updates=updates, training=True) self._f_score = K.function(inputs=(self._X, self._y), outputs=metrics, training=False) self._f_pred_prob = K.function(inputs=self._X, outputs=y_prob, training=False) self._f_pred_logit = K.function(inputs=self._X, outputs=y_logits, training=False) return self
def __init__(self, hparams, item, cluster, controller_id=0): """HierarchicalController class initializer. Args: hparams: All hyper-parameters. item: The metagraph to place. cluster: The cluster of hardware devices to optimize for. controller_id: the id of the controller in a multi-controller setup. """ super(HierarchicalController, self).__init__(item, cluster) self.ctrl_id = controller_id self.hparams = hparams if self.hparams.num_groups is None: self.num_groups = min(256, 20 * self.num_devices) else: self.num_groups = self.hparams.num_groups # creates self.op_embeddings and self.type_dict self.create_op_embeddings(verbose=False) # TODO(azalia) clean up embedding/group_embedding_size names self.group_emb_size = ( 2 * self.num_groups + len(self.type_dict) + self.hparams.max_num_outputs * self.hparams.max_output_size) self.embedding_size = self.group_emb_size self.initializer = init_ops.glorot_uniform_initializer( seed=self.hparams.seed) with variable_scope.variable_scope(self.hparams.name, initializer=self.initializer, reuse=variable_scope.AUTO_REUSE): # define parameters of feedforward variable_scope.get_variable("w_grouping_ff", [ 1 + self.hparams.max_num_outputs * self.hparams.max_output_size + self.hparams.adj_embed_dim, self.hparams.grouping_hidden_size ]) variable_scope.get_variable( "w_grouping_softmax", [self.hparams.grouping_hidden_size, self.num_groups]) if self.hparams.bi_lstm: variable_scope.get_variable("encoder_lstm_forward", [ self.embedding_size + self.hparams.hidden_size / 2, 2 * self.hparams.hidden_size ]) variable_scope.get_variable("encoder_lstm_backward", [ self.embedding_size + self.hparams.hidden_size / 2, 2 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_embeddings", [self.num_devices, self.hparams.hidden_size]) variable_scope.get_variable("decoder_lstm", [ 2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_softmax", [2 * self.hparams.hidden_size, self.num_devices]) variable_scope.get_variable("device_go_embedding", [1, self.hparams.hidden_size]) variable_scope.get_variable( "encoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "decoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable( "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) else: variable_scope.get_variable("encoder_lstm", [ self.embedding_size + self.hparams.hidden_size, 4 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_embeddings", [self.num_devices, self.hparams.hidden_size]) variable_scope.get_variable("decoder_lstm", [ 2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size ]) variable_scope.get_variable( "device_softmax", [2 * self.hparams.hidden_size, self.num_devices]) variable_scope.get_variable("device_go_embedding", [1, self.hparams.hidden_size]) variable_scope.get_variable( "encoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "decoder_forget_bias", shape=1, dtype=dtypes.float32, initializer=init_ops.constant_initializer( self.hparams.forget_bias_init)) variable_scope.get_variable( "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable( "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) seq2seq_input_layer = array_ops.placeholder_with_default( array_ops.zeros([ self.hparams.num_children, self.num_groups, self.group_emb_size ], dtypes.float32), shape=(self.hparams.num_children, self.num_groups, self.group_emb_size)) self.seq2seq_input_layer = seq2seq_input_layer
def _dnn_model_fn( features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _weight(shape): """Generates a weight of a given shape.""" # Note that the lambda is needed to allow construction inside loops. return variables.Variable( lambda: init_ops.glorot_uniform_initializer(seed=0)(shape))
def _dnn_linear_combined_model_fn( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope('input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def build(self, input_shape): """Create variables of the Cudnn RNN. It can be called manually before `__call__()` or automatically through `__call__()`. In the former case, subsequent `__call__()`s will skip creating variables. Args: input_shape: network input tensor shape, a python list or a TensorShape object with 3 dimensions. Raises: ValueError: if input_shape has wrong dimension or unknown 3rd dimension. """ if self.built: return input_shape = tensor_shape.TensorShape(input_shape) if input_shape.ndims != 3: raise ValueError("Expecting input_shape with 3 dims, got %d" % input_shape.ndims) if input_shape[-1].value is None: raise ValueError("The last dimension of the inputs to `CudnnRNN` " "should be defined. Found `None`.") self._input_size = input_shape[-1].value self.input_spec = input_spec.InputSpec(ndim=3, axes={-1: self._input_size}) self._set_scope(None) # Not using base class `add_variable()` since the it calls # `tf.get_variable()` with a callable initializer whereas here with a # tensor. The difference is mandated to support forward-compatibility with # Cudnn. with vs.variable_scope( self._scope, reuse=self.built, custom_getter=self._update_trainable_weights): if self._kernel_initializer is None: self._kernel_initializer = init_ops.glorot_uniform_initializer( seed=self._seed, dtype=self._plain_dtype) if self._bias_initializer is None: self._bias_initializer = init_ops.constant_initializer( 0.0, dtype=self._plain_dtype) weights = [ self._kernel_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_weight_shapes ] biases = [ self._bias_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_bias_shapes ] opaque_params_t = self._canonical_to_opaque(weights, biases) if vs.get_variable_scope().partitioner is not None: logging.warn( "Partitioner is not supported for Cudnn RNN layer variables, using " "it will create forward-compatibility issues with future " "CUDA/CuDNN generations.") # Initialize opaque params with a tensor with unknown shape, thus couldn't # use self.add_variable(name, shape, initializer, ...) self.kernel = vs.get_variable( "opaque_kernel", dtype=self._plain_dtype, initializer=opaque_params_t, validate_shape=False) # Create saveable in the outer scope of the cudnn subgraph, such that # alternative subgraph with platform-independent rnn cells can load the # checkpoints directly. if not (self.built or vs.get_variable_scope().reuse is True): self._create_saveable() self.built = True
def __init__(self, cell_size): self.cell_size = cell_size self.default_initializer = tf.get_variable_scope( ).initializer or init_ops.glorot_uniform_initializer() self.initializer = tf.orthogonal_initializer()
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) if rnn_feature_columns != None: rnn_features_embedding = feature_column_lib.input_layer( features=features, feature_columns=rnn_feature_columns) rnn_features_embedding = tf.reshape( rnn_features_embedding, [-1, FLAGS.rnn_length, FLAGS.rnn_input_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.rnn_hidden_size) att_wrapper = tf.contrib.rnn.AttentionCellWrapper( cell=cell, attn_length=10) outputs, _ = tf.nn.dynamic_rnn(att_wrapper, rnn_features_embedding, dtype=tf.float32) outputs = tf.reshape( outputs, [-1, FLAGS.rnn_length * FLAGS.rnn_hidden_size]) net = array_ops.concat([net, outputs], 1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def build(self, input_shape): """Create variables of the Cudnn RNN. It can be called manually before `__call__()` or automatically through `__call__()`. In the former case, subsequent `__call__()`s will skip creating variables. Args: input_shape: network input tensor shape, a python list or a TensorShape object with 3 dimensions. Raises: ValueError: if input_shape has wrong dimension or unknown 3rd dimension. """ if self.built: return input_shape = tensor_shape.TensorShape(input_shape) if input_shape.ndims != 3: raise ValueError("Expecting input_shape with 3 dims, got %d" % input_shape.ndims) if input_shape[-1].value is None: raise ValueError("The last dimension of the inputs to `CudnnRNN` " "should be defined. Found `None`.") self._input_size = input_shape[-1].value self.input_spec = base_layer.InputSpec(ndim=3, axes={-1: self._input_size}) self._set_scope(None) # Not using base class `add_variable()` since the it calls # `tf.get_variable()` with a callable initializer whereas here with a # tensor. The difference is mandated to support forward-compatibility with # Cudnn. with vs.variable_scope(self._variable_namespace, reuse=AUTO_REUSE, custom_getter=self._update_trainable_weights): if self._kernel_initializer is None: self._kernel_initializer = init_ops.glorot_uniform_initializer( seed=self._seed, dtype=self._plain_dtype) if self._bias_initializer is None: self._bias_initializer = init_ops.constant_initializer( 0.0, dtype=self._plain_dtype) weights = [ self._kernel_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_weight_shapes ] biases = [ self._bias_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_bias_shapes ] opaque_params_t = self._canonical_to_opaque(weights, biases) if vs.get_variable_scope().partitioner is not None: logging.warn( "Partitioner is not supported for Cudnn RNN layer variables, using " "it will create forward-compatibility issues with future " "CUDA/CuDNN generations.") # Initialize opaque params with a tensor. self.kernel = vs.get_variable("opaque_kernel", initializer=opaque_params_t, validate_shape=False) # Create saveable in the outer scope of the cudnn subgraph, such that # alternative subgraph with platform-independent rnn cells can load the # checkpoints directly. if not (self.built or vs.get_variable_scope().reuse is True): self._create_saveable() self.built = True
def wide_and_deep(features=None, params=None): ############### WIDE_CATE_COLS = params['WIDE_CATE_COLS'] CONTINUOUS_COLS = params['CONTINUOUS_COLS'] DEEP_EMBEDDING_COLS = params['DEEP_EMBEDDING_COLS'] WIDE_CROSS_COLS = params['WIDE_CROSS_COLS'] DEEP_SHARED_EMBEDDING_COLS = params['DEEP_SHARED_EMBEDDING_COLS'] _HIDDEN_UNITS = params['_HIDDEN_UNITS'] _LINEAR_LEARNING_RATE = params['_LINEAR_LEARNING_RATE'] _DNN_LEARNING_RATE = params['_DNN_LEARNING_RATE'] wide_logits = None linear_absolute_scope = None if params['WIDE']: wide_sum = [] with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features))) as scope: linear_absolute_scope = scope.name for col, size in WIDE_CATE_COLS: w_wide = tf.get_variable( shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Cate" + col) indices = string_ops.string_to_hash_bucket_fast( features[col], size, name="wide_hash_" + col) wide_sum.append( tf.nn.embedding_lookup(w_wide, indices, name="wide_cat_lookup_" + col)) # for col, size in WIDE_BUCKET_COLS: # w_wide = tf.get_variable(shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, # name="Wide_Part_Weights_Bucket" + col) # indices = string_ops.string_to_hash_bucket_fast( # features[col], size, name="wide_hash_" + col) # wide_sum.append(tf.nn.embedding_lookup(w_wide, indices, name="wide_bucket_lookup_" + col)) for col1, col2, size in WIDE_CROSS_COLS: w_wide = tf.get_variable( shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Cross" + col1 + '_' + col2) # cross_input = tf.as_string(tf.string_to_number(features[col1],_dtypes.int64)*tf.string_to_number(features[col2],_dtypes.int64)) cross_input = tf.string_join([features[col1], features[col2]], separator="_") indices = string_ops.string_to_hash_bucket_fast( cross_input, size, name="wide_hash_" + col1 + '_' + col2) wide_sum.append( tf.nn.embedding_lookup(w_wide, indices, name="wide_cross_lookup_" + col1 + '_' + col2)) w_wide = tf.get_variable(shape=[len(CONTINUOUS_COLS), 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Continus") bias = tf.get_variable(shape=[1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Bias") x = tf.concat([ tf.expand_dims(tf.to_float(features[col]), -1) for col in CONTINUOUS_COLS ], 1, name='continus_concat') continue_logits = tf.matmul(x, w_wide) + bias wide_logits = tf.reduce_sum(wide_sum, 0) wide_logits += continue_logits ################## deep_logits = None dnn_absolute_scope = None if params['DEEP']: # with tf.variable_scope('Deep_model'): with variable_scope.variable_scope( 'Deep_model', values=tuple(six.itervalues(features)), ) as scope: dnn_absolute_scope = scope.name # Convert categorical (string) values to embeddings deep_sum = [] for col, vals, embedding_size, col_type in DEEP_EMBEDDING_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) # embed_initializer = tf.truncated_normal_initializer( # stddev=(1.0 / tf.sqrt(float(embedding_size)))) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_embedding_" + col) if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_hash_" + col) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) seq_emb = tf.nn.embedding_lookup(embeddings, indices, name="deep_lookup_" + col) if col_type == 'seq': print("test my seq:", col) seq_emb = tf.reduce_mean(seq_emb, 1) deep_sum.append(seq_emb) for cols, vals, embedding_size, col_type, shared_flag in DEEP_SHARED_EMBEDDING_COLS: def get_indices(col, embedding_size, bucket_size): if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_shared_hash_" + col + str(shared_flag)) else: table = tf.contrib.lookup.index_table_from_tensor( embedding_size) indices = table.lookup(features[col]) return indices bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_shared_embedding_" + '_'.join(c for c in cols) + str(shared_flag)) for col in cols: indices = get_indices(col, embedding_size, bucket_size) seq_emb = tf.nn.embedding_lookup( embeddings, indices, name="deep_shared_lookup_" + col + str(shared_flag)) if col.endswith('seq'): seq_emb = tf.reduce_mean(seq_emb, 1) deep_sum.append(seq_emb) for col in CONTINUOUS_COLS: deep_sum.append( tf.expand_dims(tf.to_float(features[col]), -1, name='continuous_' + col)) curr_layer = tf.concat(deep_sum, 1, name="deep_inputs_layer") # Build the DNN for index, layer_size in enumerate(_HIDDEN_UNITS): curr_layer = tf.layers.dense( curr_layer, layer_size, activation=tf.nn.relu, kernel_initializer=init_ops.glorot_uniform_initializer(), name="deep_hidden_layer" + str(index)) deep_logits = tf.layers.dense(curr_layer, units=1, name="deep_logits") #################################### my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access loss_reduction=losses.Reduction.SUM) print(my_head.logits_dimension) if deep_logits is not None and wide_logits is not None: logits = deep_logits + wide_logits elif deep_logits is not None: logits = deep_logits else: logits = wide_logits dnn_optimizer = optimizers.get_optimizer_instance( 'Adagrad', learning_rate=_DNN_LEARNING_RATE) def _linear_learning_rate(num_linear_feature_columns): default_learning_rate = 1. / math.sqrt(num_linear_feature_columns) return min(_LINEAR_LEARNING_RATE, default_learning_rate) linear_optimizer = optimizers.get_optimizer_instance( 'Ftrl', learning_rate=_linear_learning_rate(len(WIDE_CATE_COLS))) def _train_op_fn(loss): train_ops = [] global_step = training_util.get_global_step() if deep_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_absolute_scope))) if wide_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_absolute_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return state_ops.assign_add(global_step, 1).op return my_head, logits, _train_op_fn
def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, input_layer_partitioner, batch_norm, shared_state_manager, name=None, **kwargs): super(_DNNModel, self).__init__(name=name, **kwargs) if feature_column_v2.is_feature_column_v2(feature_columns): self._input_layer = feature_column_v2.FeatureLayer( feature_columns=feature_columns, name='input_layer', shared_state_manager=shared_state_manager) else: self._input_layer = feature_column.InputLayer( feature_columns=feature_columns, name='input_layer', create_scope_now=False) self._add_layer(self._input_layer, 'input_layer') self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id) as hidden_layer_scope: hidden_layer = core_layers.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope, _scope=hidden_layer_scope) self._add_layer(hidden_layer, hidden_layer_scope.name) self._hidden_layer_scope_names.append(hidden_layer_scope.name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = core_layers.Dropout(rate=self._dropout) self._add_layer(dropout_layer, dropout_layer.name) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_layer = normalization.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name='batchnorm_%d' % layer_id, _scope='batchnorm_%d' % layer_id) self._add_layer(batch_norm_layer, batch_norm_layer.name) self._batch_norm_layers.append(batch_norm_layer) with variable_scope.variable_scope('logits') as logits_scope: self._logits_layer = core_layers.Dense( units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope, _scope=logits_scope) self._add_layer(self._logits_layer, logits_scope.name) self._logits_scope_name = logits_scope.name self._input_layer_partitioner = input_layer_partitioner
def glorot_uniform(): return init_ops.glorot_uniform_initializer()
def testAttentionCellWrapperCorrectResult(self): num_units = 4 attn_length = 6 batch_size = 2 expected_output = np.array([[1.068372, 0.45496, -0.678277, 0.340538], [1.018088, 0.378983, -0.572179, 0.268591]], dtype=np.float32) expected_state = np.array( [[ 0.74946702, 0.34681597, 0.26474735, 1.06485605, 0.38465962, 0.11420801, 0.10272158, 0.30925757, 0.63899988, 0.7181077, 0.47534478, 0.33715725, 0.58086717, 0.49446869, 0.7641536, 0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063, 0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228, 0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432, 0.99211812, 0.12295902, 1.14606023, 0.34370938, -0.79251152, 0.51843399 ], [ 0.5179342, 0.48682183, -0.25426468, 0.96810579, 0.28809637, 0.13607743, -0.11446252, 0.26792109, 0.78047138, 0.63460857, 0.49122369, 0.52007174, 0.73000264, 0.66986895, 0.73576689, 0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957, 0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421, 0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971, 0.36127412, 0.12125921, 1.1362772, 0.34361625, -0.78150457, 0.70582712 ]], dtype=np.float32) seed = 12345 random_seed.set_random_seed(seed) for state_is_tuple in [False, True]: with session.Session() as sess: with variable_scope.variable_scope( "state_is_tuple", reuse=state_is_tuple, initializer=init_ops.glorot_uniform_initializer()): lstm_cell = core_rnn_cell_impl.BasicLSTMCell( num_units, state_is_tuple=state_is_tuple) cell = rnn_cell.AttentionCellWrapper( lstm_cell, attn_length, state_is_tuple=state_is_tuple) zeros1 = random_ops.random_uniform((batch_size, num_units), 0.0, 1.0, seed=seed + 1) zeros2 = random_ops.random_uniform((batch_size, num_units), 0.0, 1.0, seed=seed + 2) zeros3 = random_ops.random_uniform((batch_size, num_units), 0.0, 1.0, seed=seed + 3) attn_state_zeros = random_ops.random_uniform( (batch_size, attn_length * num_units), 0.0, 1.0, seed=seed + 4) zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros) if not state_is_tuple: zero_state = array_ops.concat([ zero_state[0][0], zero_state[0][1], zero_state[1], zero_state[2] ], 1) inputs = random_ops.random_uniform((batch_size, num_units), 0.0, 1.0, seed=seed + 5) output, state = cell(inputs, zero_state) if state_is_tuple: state = array_ops.concat( [state[0][0], state[0][1], state[1], state[2]], 1) sess.run(variables.global_variables_initializer()) self.assertAllClose(sess.run(output), expected_output) self.assertAllClose(sess.run(state), expected_state)
def _weight(shape): """Generates a weight of a given shape.""" # Note that the lambda is needed to allow construction inside loops. return variables.Variable( lambda: init_ops.glorot_uniform_initializer(seed=0)(shape))
def _initialize(self, X): # ====== check inputs dimensions ====== # if not hasattr(X, 'shape'): raise ValueError("`X` must have `shape` attribute.") feat_dim = np.prod(X.shape[1:]) if self._feat_dim is None: self._feat_dim = feat_dim # validate input dimension if feat_dim != self._feat_dim: raise RuntimeError("Feature dimension mismatch %d and %d" % (feat_dim, self.feat_dim)) # check if tensorflow op initalized if hasattr(self, '_f_train'): return # ====== binary or multi-classes ====== # if self.nb_classes == 2: out_shape = (None,) fn_activation = tf.nn.sigmoid fn_loss = tf.losses.sigmoid_cross_entropy fn_acc = K.metrics.binary_accuracy else: out_shape = (None, self.nb_classes) fn_activation = tf.nn.softmax fn_loss = tf.losses.softmax_cross_entropy fn_acc = K.metrics.categorical_accuracy # ====== create model ====== # with tf.name_scope(self.name, 'logistic_regression'): # inputs self._X = K.placeholder(shape=(None, self.feat_dim), dtype=self.dtype, name='%s_input' % self.name) self._y = K.placeholder(shape=out_shape, dtype=self.dtype, name='%s_output' % self.name) # check the bias if is_number(self.fit_intercept): b_init = float(self.fit_intercept) elif self.fit_intercept is False or \ self.fit_intercept is None: b_init = None else: b_init = self.fit_intercept # create the model and initialize with K.variable_dtype(dtype=self.dtype): self._model = N.Dense(num_units=self.nb_classes, W_init=init_ops.glorot_uniform_initializer(seed=self._rand_state.randint()), b_init=b_init, activation=K.linear) y_logits = self._model(self._X) y_prob = fn_activation(y_logits) # applying class weights class_weights = tf.constant(value=self._class_weight, dtype=self.dtype, name="class_weights") weights = tf.gather(class_weights, tf.cast(self._y, 'int32') if self.nb_classes == 2 else tf.argmax(self._y, axis=-1)) # optimizer params = [v for v in self._model.variables if has_roles(v, Weight) or has_roles(v, Bias)] losses = fn_loss(self._y, y_logits, weights=weights) l1_norm = tf.norm(self._model.get('W'), ord=1) if self.l1 > 0. else 0 l2_norm = tf.norm(self._model.get('W'), ord=2) if self.l2 > 0. else 0 losses = losses + self.l1 * l1_norm + self.l2 * l2_norm acc = fn_acc(self._y, y_prob) updates = self._optimizer.get_updates(losses, params) # create function if self.confusion_matrix: cm = K.metrics.confusion_matrix(y_true=self._y, y_pred=y_prob, labels=self.nb_classes) metrics = [losses, acc, cm] if self.confusion_matrix else [losses, acc] self._f_train = K.function(inputs=(self._X, self._y), outputs=metrics, updates=updates, training=True) self._f_score = K.function(inputs=(self._X, self._y), outputs=metrics, training=False) self._f_pred_prob = K.function(inputs=self._X, outputs=y_prob, training=False) self._f_pred_logit = K.function(inputs=self._X, outputs=y_logits, training=False) return self