def testFunctionalDropout(self): with self.test_session(): inputs = array_ops.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) variables.global_variables_initializer().run() np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output)
def testFunctionalDropout(self): with self.cached_session(): inputs = array_ops.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) variables.global_variables_initializer().run() np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output)
def testFunctionalDropout(self): inputs = array_ops.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) if context.in_graph_mode(): self.evaluate(variables.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output)
def testFunctionalDropout(self): inputs = array_ops.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) if context.in_graph_mode(): self.evaluate(variables.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output)
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 if self.use_tf_layers: dropout = core_layers.dropout(input_layer, 1. - keep_prob) else: dropout = tf.nn.dropout(input_layer, keep_prob) if self.record_dropout: _num = 1 for i in dropout.shape.as_list(): _num = _num * int(i) if self.limit_print: _num = min(_num, self._max_print) if self.record_all: dropout = tf.Print(dropout, [dropout], message="dropout%d=" % self.counts['dropout'], first_n=self.first_n, summarize=_num) else: # Record only the first dropout if self.counts['dropout'] == 1: dropout = tf.Print(dropout, [dropout], message="dropout%d=" % self.counts['dropout'], first_n=self.first_n, summarize=_num) self.top_layer = dropout return dropout
def _upsample(self, inputs, k): x = inputs for i in reversed(range(0, k)): x = conv2d_transpose(inputs=x, filters=self.num_classes * 2 ** i, kernel_size=4, strides=2, padding='same') x = dropout(x, rate=self.dropout_prob) x = self._add_common_layers(x) return x
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): inputs = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) dense = inputs for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'dense_layer_%d' % layer_id, values=(dense, )) as hidden_layer_scope: dense = core_layers.dense( dense, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: dense = core_layers.dropout(dense, rate=dropout, training=True) _add_hidden_layer_summary(dense, hidden_layer_scope.name) with variable_scope.variable_scope( 'fm_layer', values=(inputs, )) as cross_layer_scope: builder = feature_column_lib._LazyBuilder(features) fm_outputs = [] for col_pair in fm_feature_columns: column1, column2 = col_pair tensor1 = column1._get_dense_tensor(builder, trainable=True) num_elements = column1._variable_shape.num_elements() batch_size = array_ops.shape(tensor1)[0] tensor2 = column2._get_dense_tensor(builder, trainable=True) tensor1 = array_ops.reshape(tensor1, shape=(batch_size, num_elements)) tensor2 = array_ops.reshape(tensor2, shape=(batch_size, num_elements)) fm_outputs.append(matmul(tensor1, tensor2)) fm_outputs = tf.convert_to_tensor(fm_outputs) _add_hidden_layer_summary(fm_outputs, cross_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(dense, fm_outputs)) as logits_scope: dense_cross = concat([dense, fm_outputs], axis=1) logits = core_layers.dense( dense_cross, units=1, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 dropout = core_layers.dropout(input_layer, keep_prob) self.top_layer = dropout return dropout
def testFunctionalDropout(self): with self.test_session() as sess: inputs = array_ops.ones((5, 5)) training = array_ops.placeholder(dtype='bool') dropped = core_layers.dropout(inputs, 0.5, training=training, seed=1) self.assertEqual(dropped.op.name, 'dropout/cond/Merge') sess.run(variables.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={training: True}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={training: False}) self.assertAllClose(np.ones((5, 5)), np_output)
def testFunctionalDropout(self): with self.test_session() as sess: inputs = array_ops.ones((5, 5)) training = array_ops.placeholder(dtype='bool') dropped = core_layers.dropout(inputs, 0.5, training=training, seed=1) self.assertEqual(dropped.op.name, 'dropout/cond/Merge') sess.run(variables.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={training: True}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={training: False}) self.assertAllClose(np.ones((5, 5)), np_output)
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 keep_prob_tensor = tf.constant(keep_prob, dtype=self.data_type) dropout = core_layers.dropout(input_layer, keep_prob_tensor) self.top_layer = dropout return dropout
def cnn_2d(images, is_training): """ Build the model for 2D-CNN. Inputs: -- images: Images placeholder -- is_training: bool placeholder, training or not Output: -- Logits: Return the output of the model """ # Build the CNN model l_conv1 = conv2d(images, CONV1_FILTERS, KERNEL_SIZE1, strides=STRIDE_CONV1, activation=relu, name='Conv1') l_maxpool1 = max_pooling2d(l_conv1, POOL_SIZE1, POOL_SIZE1, padding='same', name='Maxpool1') l_conv2 = conv2d(l_maxpool1, CONV2_FILTERS, KERNEL_SIZE2, strides=STRIDE_CONV2, activation=relu, name='Conv2') l_maxpool2 = max_pooling2d(l_conv2, POOL_SIZE2, POOL_SIZE2, padding='same', name='Maxpool2') l_flatten = flatten(l_maxpool2, scope='Flatten') l_fc1 = dense(l_flatten, FC1, activation=relu, name='Fc1') l_drop = dropout(l_fc1, DROP_RATE, training=is_training, name='Dropout') l_fc2 = dense(l_drop, FC2, activation=relu, name='Fc2') logits = dense(l_fc2, NUM_CLASSES, name='Output') return logits
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 if self.use_tf_layers: dropout = core_layers.dropout(input_layer, 1. - keep_prob, seed=1) else: dropout = tf.nn.dropout(input_layer, keep_prob, seed=1) self.top_layer = dropout return dropout
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = "dropout" + str(self.counts["dropout"]) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 if self.use_tf_layers: dropout = core_layers.dropout(input_layer, 1. - keep_prob) else: dropout = tf.nn.dropout(input_layer, keep_prob) self.top_layer = dropout return dropout
def dnn_logit_fn(features, mode): with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): inputs = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) dense = inputs cross = inputs for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'dense_layer_%d' % layer_id, values=(dense, )) as hidden_layer_scope: dense = core_layers.dense( dense, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: dense = core_layers.dropout(dense, rate=dropout, training=True) _add_hidden_layer_summary(dense, hidden_layer_scope.name) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'cross_layer_%d' % layer_id, values=(cross, )) as cross_layer_scope: cross = cross_layer(cross, layer_id, inputs, name=cross_layer_scope) _add_hidden_layer_summary(cross, cross_layer_scope.name) with variable_scope.variable_scope('logits', values=(dense, cross)) as logits_scope: dense_cross = concat([dense, cross], axis=1) logits = core_layers.dense( cross, units=1, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def dropout(self, keep_prob=0.5, input_layer=None): """Add a dropout layer on top of cnn.""" if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train: keep_prob = 1.0 if self.use_tf_layers: dropout = core_layers.dropout(input_layer, 1. - keep_prob, training=self.phase_train) else: dropout = tf.nn.dropout(input_layer, keep_prob) self.top_layer = dropout return dropout
def _mlp(input, is_training, hidden_layer_sizes=(512, 1024, 1024), activations=('relu', 'relu', 'relu'), dropouts=(.5, .1, .1), bns=(False, False, False)): layers = [input] for hl, act, dp, bn in zip(hidden_layer_sizes, activations, dropouts, bns): layers.append(dense(layers[-1], hl, activation=act_funcs[act])) if dp == 1.: layers.append(dropout(layers[-1], rate=dp, training=is_training)) if bn: layers.append(batch_normalization(layers[-1], training=is_training)) return layers
def dropout(self, keep_prob=0.5, input_layer=None): if input_layer is None: input_layer = self.top_layer else: self.top_size = None name = 'dropout' + str(self.counts['dropout']) with tf.variable_scope(name): if not self.phase_train or not self.trainable: keep_prob = 1.0 # print('===Debug===in cnn.dropout') # print(keep_prob) # print(self.phase_train) if self.use_tf_layers: dropout = core_layers.dropout(input_layer, 1. - keep_prob, training=self.phase_train and self.trainable) else: dropout = tf.nn.dropout(input_layer, keep_prob) self.top_layer = dropout return dropout
def dnn_logit_fn(inputs, mode): is_training = mode == ModeKeys.TRAIN with variable_scope.variable_scope('input_from_feature_columns'): dnn_inputs = [] for c in column_names: dnn_inputs.append(inputs[c]) net = array_ops.concat(dnn_inputs, axis=1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: net = normalization.batch_normalization( net, momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def cnn_3d(images, is_training): """ Build the model for 2D-CNN. Inputs: -- images: Images placeholder -- is_training: bool placeholder, training or not Output: -- Logits: Return the output of the model """ # Size of images & labels height = int(images.shape[1]) width = int(images.shape[2]) depth = int(images.shape[3]) images = tf.reshape(images, [-1, height, width, depth, 1]) # Build the model with tf.name_scope('CONV1'): l_conv1 = conv3d(images, filters=4, kernel_size=[3, 3, 10], strides=[1, 1, 5], activation=relu, kernel_regularizer=l2_regularizer(REG_lambda)) # l_conv1 = rrelu(l_conv1, is_training) l_maxpool1 = max_pooling3d(l_conv1, pool_size=[3, 3, 3], strides=[1, 1, 1], padding='same', name='Maxpool1') with tf.name_scope('CONV2'): l_conv2 = conv3d( l_maxpool1, filters=16, kernel_size=[3, 3, 10], strides=[1, 1, 2], # activation=relu, kernel_regularizer=l2_regularizer(REG_lambda)) l_conv2 = rrelu(l_conv2, is_training) l_maxpool2 = max_pooling3d(l_conv2, pool_size=[3, 3, 3], strides=[1, 1, 1], padding='same', name='Maxpool2') l_flatten = flatten(l_maxpool2, scope='Flatten') with tf.name_scope('FC1'): l_fc1 = dense( l_flatten, 200, kernel_regularizer=l2_regularizer(REG_lambda), # activation=relu ) l_fc1 = rrelu(l_fc1, is_training) l_drop1 = dropout(l_fc1, Utils.drop, training=is_training, name='Dropout1') with tf.name_scope('FC2'): l_fc2 = dense( l_drop1, 200, kernel_regularizer=l2_regularizer(REG_lambda), # activation=relu ) l_fc2 = rrelu(l_fc2, is_training) l_drop2 = dropout(l_fc2, Utils.drop, training=is_training, name='Dropout2') logits = dense(l_drop2, NUM_CLASSES, name='Output') return logits
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) if rnn_feature_columns != None: rnn_features_embedding = feature_column_lib.input_layer( features=features, feature_columns=rnn_feature_columns) rnn_features_embedding = tf.reshape( rnn_features_embedding, [-1, FLAGS.rnn_length, FLAGS.rnn_input_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.rnn_hidden_size) att_wrapper = tf.contrib.rnn.AttentionCellWrapper( cell=cell, attn_length=10) outputs, _ = tf.nn.dynamic_rnn(att_wrapper, rnn_features_embedding, dtype=tf.float32) outputs = tf.reshape( outputs, [-1, FLAGS.rnn_length * FLAGS.rnn_hidden_size]) net = array_ops.concat([net, outputs], 1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def _dnn_linear_combined_model_fn( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope('input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _build_graph(self): """Build computational graph.""" def get_num_params(): """Count the number of trainable parameters.""" num_params = 0 for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() var_num_params = 1 for dimension in shape: var_num_params *= dimension num_params += var_num_params return num_params _LOGGER.debug('Building a computational graph...') graph = tf.Graph() nodes = dict() with graph.as_default(): with tf.compat.v1.name_scope('inputs'): # inputs nodes['X'] = tf.compat.v1.placeholder(tf.int32, [None, None, None], name='X') nodes['y'] = tf.compat.v1.placeholder(tf.float32, [None], name='y') nodes['word_lens'] = tf.compat.v1.placeholder(tf.int32, [None], name='word_lens') nodes['char_lens'] = tf.compat.v1.placeholder(tf.int32, [None], name='char_lens') nodes['is_train'] = tf.compat.v1.placeholder(tf.bool, shape=[], name='is_train') # get the shape of the input X_shape = tf.shape(input=nodes['X']) batch_size = X_shape[0] max_word_len = X_shape[1] max_char_len = X_shape[2] with tf.compat.v1.name_scope('embedding_layer'): nodes['embeddings'] = tf.Variable(tf.random.uniform( [self._vocab_size, self._embedding_size], -1.0, 1.0), trainable=True, name='embeddings') embedded = tf.nn.embedding_lookup(params=nodes['embeddings'], ids=nodes['X']) embedded = dropout(embedded, rate=self._embedding_dropout, training=nodes['is_train']) with tf.compat.v1.name_scope('char_rnn_layer') as scope: # reshape the embedded matrix in order to pass it to dynamic_rnn embedded = tf.reshape(embedded, [ batch_size * max_word_len, max_char_len, self._embedding_size ]) char_rnn_fw_cell = LSTMCell(num_units=self._char_rnn_size) char_rnn_bw_cell = LSTMCell(num_units=self._char_rnn_size) (char_output_fw, char_output_bw ), states = tf.compat.v1.nn.bidirectional_dynamic_rnn( char_rnn_fw_cell, char_rnn_bw_cell, embedded, dtype=tf.float32, sequence_length=nodes['char_lens'], scope='{}bidirectional_rnn'.format(scope)) char_rnn_outputs = tf.concat([char_output_fw, char_output_bw], axis=2) with tf.compat.v1.name_scope('char_pooling_layer'): char_rnn_outputs = self._mean_pool(char_rnn_outputs, batch_size, max_char_len, max_word_len, nodes['char_lens']) char_rnn_outputs = dropout(char_rnn_outputs, rate=self._char_rnn_dropout, training=nodes['is_train']) with tf.compat.v1.name_scope('word_rnn_layer') as scope: word_rnn_fw_cell = LSTMCell(num_units=self._word_rnn_size) word_rnn_bw_cell = LSTMCell(num_units=self._word_rnn_size) (char_output_fw, char_output_bw ), states = tf.compat.v1.nn.bidirectional_dynamic_rnn( word_rnn_fw_cell, word_rnn_bw_cell, char_rnn_outputs, dtype=tf.float32, sequence_length=nodes['word_lens'], scope='{}bidirectional_rnn'.format(scope)) word_rnn_outputs = tf.concat([char_output_fw, char_output_bw], axis=2) with tf.compat.v1.name_scope('word_pooling_layer'): word_rnn_outputs, nodes[ 'attentions'] = self._attention_pool(word_rnn_outputs) word_rnn_outputs = dropout(word_rnn_outputs, rate=self._word_rnn_dropout, training=nodes['is_train']) with tf.compat.v1.variable_scope('softmax_layer'): nodes['W_s'] = tf.Variable(tf.random.normal( [self._word_rnn_size * 2, 1]), name='weight') nodes['b_s'] = tf.Variable(tf.random.normal([1]), name='bias') logits = tf.squeeze( tf.matmul(word_rnn_outputs, nodes['W_s']) + nodes['b_s']) nodes['y_pred'] = tf.nn.sigmoid(logits) with tf.compat.v1.variable_scope('optimizer'): nodes['loss'] = tf.reduce_mean( input_tensor=tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=nodes['y'])) nodes['optimizer'] = tf.compat.v1.train.AdamOptimizer( self._learning_rate).minimize(nodes['loss']) # initialize the variables nodes['init'] = tf.compat.v1.global_variables_initializer() # count the number of parameters self._num_params = get_num_params() #_LOGGER.debug('Total number of parameters = {:,}'.format(self._num_params)) # generate summaries for variable in tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES): # having ":" in the name is illegal, so replace to "/" tf.compat.v1.summary.histogram(variable.name.replace(':', '/'), variable) nodes['summaries'] = tf.compat.v1.summary.merge_all() # save the model to checkpoint nodes['saver'] = tf.compat.v1.train.Saver() self._graph = graph self._nodes = nodes
def MNIST_model(features, labels, mode): #input features input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) if mode == tf.estimator.ModeKeys.EVAL: input_data = tf.constant(1.0) - input_layer else: input_data = input_layer #convolution 1 conv1 = conv2d(inputs=input_data, filters=32, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 1 pool1 = max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) #convolution 2 conv2 = conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 2 pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) #Fully connected pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense_out = dense(inputs=pool2_flat, units=hidden_size, activation=relu) dropout_out = dropout(inputs=dense_out, rate=drop_rate, training=mode == tf.estimator.ModeKeys.TRAIN) logits = dense(inputs=dropout_out, units=10) #a dictionary of prediction operators predictions = { "logits": tf.multiply(logits, tf.constant(1.0), name="logit_out"), #class prediction "classes": tf.argmax(input=logits, axis=1), #probability prediction "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } #prediction mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #Calculate Loss (for both TRAIN and EVAL modes) onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) #train mode if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) #evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def __init__(self, in_shape, classes, lr=0.001): ''' classes: List of class names or integers corrisponding to each class being classified by the network. ie: ['left', 'straight', 'right'] or [0, 1, 2] ''' # Define classes self.num_bins = len(classes) self.classes = np.array(classes, np.float32) self.class_lookup = [c for c in classes] # Define model tf.reset_default_graph() self.x = tf.placeholder(tf.float32, shape=[ None, ] + in_shape, name="input") self.y_steering = tf.placeholder(tf.int32, shape=(None, )) self.y_throttle = tf.placeholder(tf.float32, shape=(None, )) self._training = tf.placeholder(tf.bool) self.training = tf.get_variable("training", dtype=tf.bool, initializer=True, trainable=False) self.set_training = self.training.assign(self._training) relu = tf.nn.relu sigmoid = tf.nn.sigmoid with tf.name_scope("donkey"): # input num conv stride pad conv = conv2d(self.x, 24, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv1") conv = conv2d(conv, 32, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv2") conv = conv2d(conv, 64, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv3") conv = conv2d(conv, 64, (3, 3), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv4") conv = conv2d(conv, 64, (3, 3), (1, 1), "same", activation=relu, kernel_initializer=xavier(), name="conv5") conv = flatten(conv) # in num conv = dense(conv, 100, activation=relu, kernel_initializer=xavier(), name="fc1") conv = dropout(conv, rate=0.1, training=self.training) conv = dense(conv, 50, activation=relu, kernel_initializer=xavier(), name="fc2") conv = dropout(conv, rate=0.1, training=self.training) # Steering self.logits = dense(conv, self.num_bins, activation=None, kernel_initializer=xavier(), name="logits") self.steering_probs = tf.nn.softmax(self.logits, name="steeringi_probs") self.steering_prediction = tf.reduce_sum( tf.multiply(self.steering_probs, self.classes), axis=1, name="steering_prediction") # Throttle self.throttle = dense(conv, 1, sigmoid, kernel_initializer=xavier(), name="throttle") # keep tensor names for easy freezing/loading later self._TENSOR_DICT = { common._IMAGE_INPUT: self.x.name, common._STEERING_PREDICTION: self.steering_prediction.name, common._STEERING_PROBS: self.steering_probs.name, common._THROTTLE_PREDICTION: self.throttle.name } with tf.name_scope("loss"): self.loss_steering = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_steering, logits=self.logits) self.loss_steering = tf.reduce_mean(self.loss_steering) self.loss_throttle = tf.reduce_mean( (self.throttle - self.y_throttle)**2) self.loss = 0.9 * self.loss_steering + 0.001 * self.loss_throttle tf.summary.scalar("weighted_loss", self.loss) tf.summary.scalar("steering_loss", self.loss_steering) tf.summary.scalar("throttle_loss", self.loss_throttle) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.train_step = optimizer.minimize(self.loss) self.init_vars = tf.global_variables_initializer() self.saver = tf.train.Saver()
def MNIST_model(features, labels, mode): #input features input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) if mode == tf.estimator.ModeKeys.EVAL: #input_data = tf.constant(1.0) - input_layer input_data = input_layer else: input_data = input_layer #convolution 1 conv1 = conv2d(inputs=input_data, filters=32, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 1 pool1 = max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) #convolution 2 conv2 = conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 2 pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) #Fully connected pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense_out = dense(inputs=pool2_flat, units=hidden_size, activation=relu) dropout_out = dropout(inputs=dense_out, rate=drop_rate, training=mode == tf.estimator.ModeKeys.TRAIN) #Generate a [28 * 28, 10] matrix as context #initialize w_a_1 = weight_variable(name="w_a_1", shape=[hidden_size, 28 * 28 * 10]) b_a_1 = bias_variable(name="b_a_1", shape=[28 * 28 * 10]) context = tf.add(tf.matmul(dropout_out, w_a_1), b_a_1) context_matrix = tf.reshape(context, [-1, 28 * 28, 10]) #dot product layer input_data_flat = tf.reshape(input_data, [-1, 28 * 28, 1]) input_data_tiled = tf.tile(input=input_data_flat, multiples=[1, 1, 10]) weighted_context = tf.multiply(input_data_tiled, context_matrix) #Generate softmax result logits = tf.reduce_sum(weighted_context, axis=[1]) #a dictionary of prediction operators predictions = { "logits": tf.multiply(logits, tf.constant(1.0), name="logit_out"), #class prediction "classes": tf.argmax(input=logits, axis=1), #probability prediction "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } #prediction mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #regularization l1_regularizer = tf.contrib.layers.l1_regularizer(scale=reg_scale) regularization_cost = tf.contrib.layers.apply_regularization( l1_regularizer, [context_matrix]) #Calculate Loss (for both TRAIN and EVAL modes) onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) error_cost = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) #total lost loss = regularization_cost + error_cost #train mode if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) #evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dropout(self, bottom, drop_rate): return dropout(bottom, rate=drop_rate, training=self.training)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( 'input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as dnn_hidden_layer_scope: net = core_layers.dense(net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops. glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net, )) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn( features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _create_model(self): self.input_image = tf.placeholder(tf.float32, shape=(None, None, None, self.img_input_channels), name='input_image_placeholder') self.gt_image = tf.placeholder(tf.int32, shape=(None, None, None, self.num_classes), name='gt_image_placeholder') self.gt_contours = tf.placeholder(tf.int32, shape=(None, None, None, self.num_classes), name='gt_contours_placeholder') self.dropout_prob = tf.placeholder(dtype=tf.float32, shape=None, name='dropout_prob_placeholder') self.lr = tf.placeholder(dtype=tf.float32, shape=None, name='learning_rate_placeholder') scale_nc = self.hps.get('scale_nc') with tf.variable_scope("encoder"): with tf.variable_scope("block_1"): conv1 = self._add_common_layers(conv2d(self.input_image, filters=32*scale_nc, kernel_size=3, padding='same')) conv2 = self._add_common_layers(conv2d(conv1, filters=32*scale_nc, kernel_size=3, padding='same')) with tf.variable_scope("block_2"): mp2 = max_pooling2d(conv2, pool_size=2, strides=2, padding='same') bn1 = self._add_common_layers(self._bottleneck(mp2, size=64*scale_nc)) bn2 = self._add_common_layers(self._bottleneck(bn1, size=64*scale_nc)) with tf.variable_scope("block_3"): mp3 = max_pooling2d(bn2, pool_size=2, strides=2, padding='same') bn3 = self._add_common_layers(self._bottleneck(mp3, size=128*scale_nc)) bn4 = self._add_common_layers(self._bottleneck(bn3, size=128*scale_nc)) with tf.variable_scope("block_4"): mp4 = max_pooling2d(bn4, pool_size=2, strides=2, padding='same') bn5 = self._add_common_layers(self._bottleneck(mp4, size=256*scale_nc)) bn6 = self._add_common_layers(self._bottleneck(bn5, size=256*scale_nc)) d1 = dropout(bn6, rate=self.dropout_prob) with tf.variable_scope("block_5"): mp5 = max_pooling2d(d1, pool_size=2, strides=2, padding='same') bn7 = self._add_common_layers(self._bottleneck(mp5, size=256*scale_nc)) bn8 = self._add_common_layers(self._bottleneck(bn7, size=256*scale_nc)) d2 = dropout(bn8, rate=self.dropout_prob) with tf.variable_scope("block_6"): mp6 = max_pooling2d(d2, pool_size=2, strides=2, padding='same') bn9 = self._add_common_layers(self._bottleneck(mp6, size=256*scale_nc)) bn10 = self._add_common_layers(self._bottleneck(bn9, size=256*scale_nc)) d3 = dropout(bn10, rate=self.dropout_prob) self.img_descriptor = tf.reduce_mean(d3, axis=(1, 2)) with tf.variable_scope("decoder_seg"): deconvs = [] deconvs.append(conv2d(conv2, filters=self.num_classes, kernel_size=3, padding='same')) deconvs.append(self._upsample(bn2, k=1)) deconvs.append(self._upsample(bn4, k=2)) deconvs.append(self._upsample(d1, k=3)) deconvs.append(self._upsample(d2, k=4)) deconvs.append(self._upsample(d3, k=5)) concat = tf.concat(deconvs, axis=3) conv3 = conv2d(concat, filters=self.num_classes, kernel_size=3, padding='same') ac1 = self._add_common_layers(conv3) conv4 = conv2d(ac1, filters=self.num_classes, kernel_size=1, padding='same') ac2 = self._add_common_layers(conv4) self.preds_seg = softmax(ac2) with tf.variable_scope("decoder_cont"): deconvs = [] deconvs.append(conv2d(conv2, filters=self.num_classes, kernel_size=3, padding='same')) deconvs.append(self._upsample(bn2, k=1)) deconvs.append(self._upsample(bn4, k=2)) deconvs.append(self._upsample(d1, k=3)) deconvs.append(self._upsample(d2, k=4)) deconvs.append(self._upsample(d3, k=5)) concat = tf.concat(deconvs, axis=3) conv3 = conv2d(concat, filters=self.num_classes, kernel_size=3, padding='same') ac1 = self._add_common_layers(conv3) conv4 = conv2d(ac1, filters=self.num_classes, kernel_size=1, padding='same') ac2 = self._add_common_layers(conv4) self.preds_cont = softmax(ac2) cond1 = tf.greater_equal(self.preds_seg, self.threshold) cond2 = tf.less(self.preds_cont, self.threshold) conditions = tf.logical_and(cond1, cond2) self.preds = tf.where(conditions, tf.ones_like(conditions), tf.zeros_like(conditions)) self._add_train_op() self.summaries = tf.summary.merge_all()
def __init__(self, encoder, layer_def, classes, lr=0.001): """ encoder: {"path": string, # path to frozen encoder pb file "input_tensor_name": string, "output_tensor_name": string, "name": string # name of encoder } layer_def: Node definition for fully connected network. List of dict. [{"neurons" : int, # number of neurons "activation": function, # activation function "init" : function, # tf initialization function "name" : string, # used to identify tensor "dropout" : float(0-1) # if 1 then no dropout is added. keep_prob=1 }, {...} ] classes: list of classes the network will be identifying, can be list of int of list of string. """ self.classes = classes self.num_bins = len(classes) self.class_idxs = np.array(range(len(classes)), np.float32) # Load frozen encoder self.encoder = load_graph(encoder["path"], encoder["name"]) with self.encoder.as_default() as graph: # images into the network self.x = self.encoder.get_tensor_by_name(encoder["name"] + "/" + \ encoder["input_tensor_name"]) # embedding from encoder _z = self.encoder.get_tensor_by_name(encoder["name"] + "/" + \ encoder["output_tensor_name"]) # ensure encoder's weights are held static self.z = tf.stop_gradient(_z) # known label self.y = tf.placeholder(tf.int32, name="y") self._training = tf.placeholder(tf.bool) self.training = tf.get_variable("training", dtype=tf.bool, initializer=True, trainable=False) self.set_training = self.training.assign(self._training) lay1 = dense(self.z, 40, activation=tf.nn.relu, kernel_initializer=xavier()) lay1 = dropout(lay1, rate=0.5) lay1 = dense(lay1, 30, activation=tf.nn.relu, kernel_initializer=xavier()) lay1 = dropout(lay1, rate=0.5) lay1 = dense(lay1, 20, activation=tf.nn.relu, kernel_initializer=xavier()) lay1 = dropout(lay1, rate=0.5) lay1 = dense(lay1, 15, activation=None, kernel_initializer=xavier()) self.logits = lay1 self.prediction = self.make_prediction() self.loss = self.loss_fn() # Tensorboard tf.summary.scalar("loss", self.loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.train_step = optimizer.minimize(self.loss) self.init_vars = tf.global_variables_initializer() update_tensor_dict(INPUTS, IMAGE_INPUT, self.x) update_tensor_dict(OUTPUTS, PREDICTION, self.prediction) self.saver = tf.train.Saver() self.graph = graph
def __init__(self, in_shape, lr=0.001, embedding_dim=50, num_projections=20): ''' classes: List of class names or integers corrisponding to each class being classified by the network. ie: ['left', 'straight', 'right'] or [0, 1, 2] ''' # number of elements in the embedding vector self.embedding_dim = embedding_dim # number of test embeddings to project in tensorboard self.num_projections = num_projections # Define model tf.reset_default_graph() self.x = tf.placeholder(tf.float32, shape=[ None, ] + in_shape, name="x") self.y = tf.placeholder(tf.float32, shape=[ None, ] + in_shape, name="y") self._training = tf.placeholder(tf.bool) self.training = tf.get_variable("training", dtype=tf.bool, initializer=True, trainable=False) self.set_training = self.training.assign(self._training) self._embeddings = tf.placeholder(tf.float32, shape=[None, self.embedding_dim]) self.embeddings = tf.get_variable( "embeddings", dtype=tf.float32, shape=[self.num_projections, self.embedding_dim], initializer=tf.zeros_initializer(), trainable=False) self.update_embeddings = self.embeddings.assign(self._embeddings) paddings = tf.constant([[0, 0], [4, 4], [0, 0], [0, 0]]) relu = tf.nn.relu sigmoid = tf.nn.sigmoid with tf.name_scope("encoder"): # Padding invector so reconstruction returns to the correct size. x_padded = tf.pad(self.x, paddings, "SYMMETRIC") # Encoder in num shape stride pad enc1 = conv2d(x_padded, 24, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="enc1") # 64, 80,24 enc2 = conv2d(enc1, 32, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="enc2") # 32, 40,32 enc3 = conv2d(enc2, 64, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="enc3") # 16, 20,64 enc4 = conv2d(enc3, 64, (3, 3), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="enc4") # 8, 10,64 enc5 = conv2d(enc4, 64, (3, 3), (1, 1), "same", activation=relu, kernel_initializer=xavier(), name="enc5") # 8, 10,64 enc5f = flatten(enc5) # in num enc6 = dense(enc5f, 100, activation=relu, kernel_initializer=xavier(), name="enc6") enc6d = dropout(enc6, rate=0.1, training=self.training, name="enc6d") with tf.name_scope("embedding"): # Autoencoder embedding self.z = dense(enc6d, self.embedding_dim, activation=relu, kernel_initializer=xavier(), name="z") tf.summary.histogram("z", self.z) with tf.name_scope("decoder"): # Decoder in num dec1 = dense(self.z, 100, activation=relu, kernel_initializer=xavier(), name="dec1") dec2 = dense(dec1, (8 * 10 * 64), activation=relu, kernel_initializer=xavier(), name="dec2") dec2r = tf.reshape(dec2, (-1, 8, 10, 64)) # in num shape stride pad dec3 = conv2d_transpose(dec2r, 64, (3, 3), (1, 1), "same", activation=relu, name="dec3") dec4 = conv2d_transpose(dec3, 64, (3, 3), (2, 2), "same", activation=relu, name="dec4") dec5 = conv2d_transpose(dec4, 32, (5, 5), (2, 2), "same", activation=relu, name="dec5") dec6 = conv2d_transpose(dec5, 24, (5, 5), (2, 2), "same", activation=relu, name="dec6") dec7 = conv2d_transpose(dec6, 3, (5, 5), (2, 2), "same", activation=None, name="dec8") self.dec = relu(dec7, name="reconstruction") with tf.name_scope("loss"): # # VAE Loss # 1. Reconstruction loss: How far did we get from the actual image? y_padded = tf.pad(self.y, paddings, "SYMMETRIC") self.rec_loss = tf.reduce_sum(tf.square(y_padded - self.dec), axis=[1, 2, 3]) self.loss = tf.reduce_mean(self.rec_loss, name='total_loss') tf.summary.scalar("total_loss", self.loss) update_tensor_dict(INPUTS, IMAGE_INPUT, self.x) update_tensor_dict(OUTPUTS, EMBEDDING, self.z) update_tensor_dict(OUTPUTS, RECONSTRUCTION, self.dec) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.train_step = optimizer.minimize(self.loss, name="train_step") self.init_vars = tf.global_variables_initializer() self.saver = tf.train.Saver()