def __init__(self, num_hidden_layers, dropout=True, dropout_rate=0.2, activation=None, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, name=None, reuse=False): self.layers = [] self.num_hidden_layers = num_hidden_layers self._name = name if name else 'MLP' self._reuse = reuse for zb_layer_num, num_hidden_layer in enumerate(num_hidden_layers): layer_num = zb_layer_num + 1 if dropout and layer_num < len(num_hidden_layers): self.layers.append(layers_core.Dropout(rate=dropout_rate)) layer_activation = activation else: layer_activation = None self.layers.append( layers_core.Dense( num_hidden_layer, # Dense dim ; output_dim, 前一维度,自动推断; activation=layer_activation, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, trainable=True, name='{}/dense_{}'.format(self._name, layer_num), _reuse=reuse))
def call(self, inputs, state): if not isinstance(state, tf.contrib.seq2seq.AttentionWrapperState): raise TypeError("Expected state to be instance of MyAttentionWrapperState. " "Received type %s instead." % type(state)) cell_inputs = self._cell_input_fn(inputs, state.attention) cell_state = state.cell_state cell_output, next_cell_state = self._cell(cell_inputs, cell_state) cell_batch_size = ( cell_output.shape[0].value or tf.shape(cell_output)[0]) error_message = ('custom error msg:1') with tf.control_dependencies( self._batch_size_checks(cell_batch_size, error_message)): cell_output = tf.identity( cell_output, name="checked_cell_output") if self._is_multi: previous_attention_state = state.attention_state previous_alignment_history = state.alignment_history else: previous_attention_state = [state.attention_state] previous_alignment_history = [state.alignment_history] all_alignments = [] all_attentions = [] all_attention_states = [] maybe_all_histories = [] for i, attention_mechanism in enumerate(self._attention_mechanisms): attention, alignments, next_attention_state = _compute_attention( attention_mechanism, cell_output, previous_attention_state[i], self._attention_layers[i] if self._attention_layers else None, self._attention_dropout_layers[i] if self._attention_dropout_layers else None, self.training) alignment_history = previous_alignment_history[i].write( state.time, alignments) if self._alignment_history else () all_attention_states.append(next_attention_state) all_alignments.append(alignments) all_attentions.append(attention) maybe_all_histories.append(alignment_history) attention = tf.concat(all_attentions, 1) next_state = tf.contrib.seq2seq.AttentionWrapperState( time=state.time + 1, cell_state=next_cell_state, attention=attention, attention_state=self._item_or_tuple(all_attention_states), alignments=self._item_or_tuple(all_alignments), alignment_history=self._item_or_tuple(maybe_all_histories)) outputs = layers_core.Dropout(rate=1-keep_prob)(self.attention_layer(tf.concat([cell_output, attention], 1)), training=self.training) if self._output_attention: return outputs, next_state else: raise NotImplementedError()
def testCustomNoiseShape(self): inputs = array_ops.ones((5, 3, 2)) noise_shape = [5, 1, 2] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp.apply(inputs, training=True) self.evaluate(variables.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
def testCustomNoiseShape(self): with self.test_session() as sess: inputs = tf.ones((5, 3, 2)) noise_shape = [5, 1, 2] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp.apply(inputs, training=True) sess.run(tf.global_variables_initializer()) np_output = sess.run(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
def testDynamicRate(self): with self.cached_session() as sess: rate = array_ops.placeholder(dtype='float32', name='rate') dp = core_layers.Dropout(rate, name='dropout') inputs = array_ops.ones((5, 5)) dropped = dp.apply(inputs, training=True) self.evaluate(variables.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={rate: 0.5}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={rate: 0.0}) self.assertAllClose(np.ones((5, 5)), np_output)
def testDynamicLearningPhase(self): with self.cached_session() as sess: dp = core_layers.Dropout(0.5, seed=1) inputs = array_ops.ones((5, 5)) training = array_ops.placeholder(dtype='bool') dropped = dp.apply(inputs, training=training) self.evaluate(variables.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={training: True}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={training: False}) self.assertAllClose(np.ones((5, 5)), np_output)
def testBooleanLearningPhase(self): dp = core_layers.Dropout(0.5) inputs = array_ops.ones((5, 3)) dropped = dp.apply(inputs, training=True) if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = dp.apply(inputs, training=False) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 3)), np_output)
def testBooleanLearningPhase(self): with self.test_session() as sess: dp = core_layers.Dropout(0.5) inputs = array_ops.ones((5, 3)) dropped = dp.apply(inputs, training=True) sess.run(variables.global_variables_initializer()) np_output = sess.run(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = dp.apply(inputs, training=False) np_output = sess.run(dropped) self.assertAllClose(np.ones((5, 3)), np_output)
def testTrainingLayer(self): net = network.Sequential([core.Dropout(0.99999)]) two = constant_op.constant(2.0) self.assertEqual(2.0, net(two).numpy()) self.assertEqual(2.0, net(two, training=False).numpy()) for _ in range(20): with_dropout = net(two, training=True).numpy() self.assertIn(with_dropout, [0.0, 2.0]) if with_dropout == 0.0: return # Should only fail spuriously 1 in 10^100 runs. self.fail("Didn't see dropout happen after 20 tries.")
def dropout(keep_prob=0.5, noise_shape=None, name=None): """Returns a dropout op applied to the input. With probability `keep_prob`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. Args: inputs: The tensor to pass to the nn.dropout op. keep_prob: A scalar `Tensor` with the same type as x. The probability that each element is kept. noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for randomly generated keep/drop flags. is_training: A bool `Tensor` indicating whether or not the model is in training mode. If so, dropout is applied and values scaled. Otherwise, inputs is returned. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. Returns: A tensor representing the output of the operation. """ layer = core_layers.Dropout(rate=1 - keep_prob, noise_shape=noise_shape, name=name, _scope=name) return layer
def testDropoutProperties(self): dp = core_layers.Dropout(0.5, name='dropout') self.assertEqual(dp.rate, 0.5) self.assertEqual(dp.noise_shape, None) dp.apply(array_ops.ones(())) self.assertEqual(dp.name, 'dropout')
def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, input_layer_partitioner, batch_norm, name=None, **kwargs): super(_DNNModel, self).__init__(name=name, **kwargs) if feature_column_lib.is_feature_column_v2(feature_columns): self._input_layer = feature_column_lib.DenseFeatures( feature_columns=feature_columns, name='input_layer') else: self._input_layer = feature_column.InputLayer( feature_columns=feature_columns, name='input_layer', create_scope_now=False) self._add_layer(self._input_layer, 'input_layer') self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id) as hidden_layer_scope: hidden_layer = core_layers.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope, _scope=hidden_layer_scope) self._add_layer(hidden_layer, hidden_layer_scope.name) self._hidden_layer_scope_names.append(hidden_layer_scope.name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = core_layers.Dropout(rate=self._dropout) self._add_layer(dropout_layer, dropout_layer.name) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_layer = normalization.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name='batchnorm_%d' % layer_id, _scope='batchnorm_%d' % layer_id) self._add_layer(batch_norm_layer, batch_norm_layer.name) self._batch_norm_layers.append(batch_norm_layer) with variable_scope.variable_scope('logits') as logits_scope: self._logits_layer = core_layers.Dense( units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope, _scope=logits_scope) self._add_layer(self._logits_layer, logits_scope.name) self._logits_scope_name = logits_scope.name self._input_layer_partitioner = input_layer_partitioner
def testDropoutProperties(self): dp = core_layers.Dropout(0.5) self.assertEqual(dp.rate, 0.5) self.assertEqual(dp.name, 'dropout') self.assertEqual(dp.noise_shape, None)
def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, name=None, **kwargs): super(_DNNModelV2, self).__init__(name=name, **kwargs) # Add this name_scope for backward compatibility, as previously it's used # in variable_scope with ops.name_scope( 'input_from_feature_columns') as input_feature_column_scope: layer_name = input_feature_column_scope + 'input_layer' if feature_column_lib.is_feature_column_v2(feature_columns): self._input_layer = feature_column_lib.DenseFeatures( feature_columns=feature_columns, name=layer_name) else: self._input_layer = feature_column.InputLayer( feature_columns=feature_columns, name=layer_name, create_scope_now=False) self._add_layer(self._input_layer, self._input_layer.name) self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope: # Get scope name without the trailing slash. hidden_shared_name = _name_from_scope_name(hidden_layer_scope) hidden_layer = core_layers.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_shared_name) self._add_layer(hidden_layer, hidden_shared_name) self._hidden_layer_scope_names.append(hidden_shared_name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = core_layers.Dropout(rate=self._dropout) self._add_layer(dropout_layer, dropout_layer.name) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id batch_norm_layer = normalization.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name=batch_norm_name) self._add_layer(batch_norm_layer, batch_norm_name) self._batch_norm_layers.append(batch_norm_layer) with ops.name_scope('logits') as logits_scope: logits_shared_name = _name_from_scope_name(logits_scope) self._logits_layer = core_layers.Dense( units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_shared_name) self._add_layer(self._logits_layer, logits_shared_name) self._logits_scope_name = logits_shared_name
def __init__(self, cell, attention_mechanism, keep_prob, training, attention_layer_size=None, alignment_history=False, cell_input_fn=None, output_attention=True, initial_cell_state=None, name=None): super(MyAttentionWrapper, self).__init__(cell, attention_mechanism, attention_layer_size, alignment_history, cell_input_fn, output_attention, initial_cell_state, name) self.keep_prob = keep_prob self.training = training super(tf.contrib.seq2seq.AttentionWrapper, self).__init__(name=name) if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access raise TypeError("cell must be an RNNCell, saw type: %s" % type(cell).__name__) if isinstance(attention_mechanism, (list, tuple)): self._is_multi = True attention_mechanisms = attention_mechanism for attention_mechanism in attention_mechanisms: if not isinstance(attention_mechanism, AttentionMechanism): raise TypeError("attention_mechanism must contain only instances of " "AttentionMechanism, saw type: %s" % type(attention_mechanism).__name__) else: self._is_multi = False if not isinstance(attention_mechanism, tf.contrib.seq2seq.AttentionMechanism): raise TypeError("attention_mechanism must be an AttentionMechanism or list of " "multiple AttentionMechanism instances, saw type: %s" % type(attention_mechanism).__name__) attention_mechanisms = (attention_mechanism,) if cell_input_fn is None: cell_input_fn = (lambda inputs, attention: tf.concat([inputs, attention], -1)) else: if not callable(cell_input_fn): raise TypeError("cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__) if attention_layer_size is not None: attention_layer_sizes = tuple(attention_layer_size if isinstance(attention_layer_size, (list, tuple)) else (attention_layer_size,)) if len(attention_layer_sizes) != len(attention_mechanisms): raise ValueError("If provided, attention_layer_size must contain exactly one " "integer per attention_mechanism, saw: %d vs %d" % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple(layers_core.Dense(attention_layer_size, name="attention_layer", use_bias=True, activation=tf.tanh) for attention_layer_size in attention_layer_sizes) self._attention_dropout_layers = tuple(layers_core.Dropout(rate=1-self.keep_prob, name="attention_dropout_layer") for attention_layer_size in attention_layer_sizes) self._attention_layer_size = sum(attention_layer_sizes) else: self._attention_layers = None self._attention_dropout_layers = None self._attention_layer_size = sum(attention_mechanism.values.get_shape()[-1].value for attention_mechanism in attention_mechanisms) self._cell = cell self._attention_mechanisms = attention_mechanisms self._cell_input_fn = cell_input_fn self._output_attention = output_attention self._alignment_history = alignment_history with tf.name_scope(name, "AttentionWrapperInit"): if initial_cell_state is None: self._initial_cell_state = None else: final_state_tensor = nest.flatten(initial_cell_state)[-1] state_batch_size = (final_state_tensor.shape[0].value or tf.shape(final_state_tensor)[0]) error_message = ('custom error msg:0') with tf.control_dependencies( self._batch_size_checks(state_batch_size, error_message)): self._initial_cell_state = nest.map_structure(lambda s: tf.identity(s, name="check_initial_cell_state"), initial_cell_state) self.attention_layer = layers_core.Dense(512, activation=tf.tanh)