def model_fn(features, labels, mode, params): """ Args: features: Tensor with shape (batch_size, max_seq_len) with dtype int. labels: same as features, since we are training an autoencoder. params: tf.contrib.HParams object containing... embed_size, vocab_size """ if mode == tf.estimator.ModeKeys.PREDICT: features = features['x'] # Load and build the embedding layer, initialized with pre-trained # GloVe embeddings. embedded_features = components.glove_embed(features, embed_shape=(params.vocab_size, params.embed_size), vocabulary=params.vocab) hidden_layer = tf.layers.Dense(params.hidden_size, activation=tf.tanh)(embedded_features) logits = tf.layers.Dense(params.vocab_size)(hidden_layer) if mode == tf.estimator.ModeKeys.PREDICT: output_probs = tf.nn.softmax(logits) preds = tf.argmax(output_probs, -1) table = tf.contrib.lookup.index_to_string_table_from_tensor( params.vocab) preds_words = table.lookup(preds) return tf.estimator.EstimatorSpec(mode, predictions=preds_words) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_or_create_global_step(), learning_rate=0.01, optimizer='Adam') with tf.variable_scope('accuracy', values=[labels, logits]): flattened_logits = tf.reshape( logits, [params.batch_size * params.max_seq_len, -1]) flattened_labels = tf.reshape(labels, [ params.batch_size * params.max_seq_len, ]) output_probs = tf.nn.softmax(flattened_logits) num_correct = tf.to_float( tf.nn.in_top_k(output_probs, tf.to_int64(flattened_labels), 1)) accuracy = tf.reduce_mean(num_correct) tf.summary.scalar('accuracy', accuracy) if mode == tf.estimator.ModeKeys.TRAIN: logging_hook = tf.train.LoggingTensorHook( { 'loss': loss, 'acc': accuracy }, every_n_iter=100) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
def model_fn(features, labels, mode, params): print('=' * 50, '\n', 'MODEL_FN CALLED IN MODE', mode, '\n', '=' * 50) # Place any special preprocessing needed for PREDICT and/or serving here: if mode == tf.estimator.ModeKeys.PREDICT: input_doc = features['x'] table = tf.contrib.lookup.index_table_from_file( vocabulary_file=params.vocab_path, vocab_size=params.vocab_size, default_value=1) features = table.lookup(input_doc) # Load and build the embedding layer, initialized with # pre-trained GloVe embeddings. # Has shape (batch_size, max_seq_len, embed_size) with tf.device('/cpu:0'): embedded_features = components.glove_embed( features, embed_shape=(params.vocab_size, params.embed_size), vocab_path=params.vocab_path, projector_path=params.model_dir) with tf.device('/{}:0'.format(args.device)): # Ensure that `embedded_features` has rank 3 before executing any further. with tf.control_dependencies([tf.assert_rank(embedded_features, 3)]): _, final_state = components.deep_blstm( inputs=embedded_features, mode=mode, state_size=params.state_size, num_layers=params.num_layers, dropout_prob=params.dropout_prob) if params.l2_decay: l2_regularize(final_state, params.l2_decay, 'final_state_l2') # We project the final output state to obtain # the logits over each of our possible classes (labels). logits = tf.layers.Dense(params.num_labels, kernel_regularizer=functools.partial( l2_regularize, scale=params.l2_decay, name='logits_kernel_l2'))(final_state) with tf.variable_scope('predictions'): output_probs = tf.nn.softmax(logits, name='output_probs') preds = tf.argmax(output_probs, -1, name='preds') if params.debug: preds = tf.Print(preds, [preds], 'Preds print: ', first_n=10, summarize=5) # Create table for converting prediction index -> label. table = tf.contrib.lookup.index_to_string_table_from_file( params.labels_path) # Convert each prediction index to the corresponding label. preds_words = tf.identity(table.lookup(preds), 'preds_words') # For PREDICT mode, compute predicted label for each example in batch. if mode == tf.estimator.ModeKeys.PREDICT: pred_out = tf.estimator.export.PredictOutput( {'preds_words': preds_words}) return tf.estimator.EstimatorSpec( mode, predictions=preds_words, export_outputs={'export_outputs': pred_out}, prediction_hooks=[ hooks.FreezingHook( 'preds_words', os.path.join(params.model_dir, 'frozen_model.pb')) ], scaffold=tf.train.Scaffold(init_op=tf.tables_initializer())) with tf.device('/{}:0'.format(args.device)): # from tensorflow.contrib.layers import optimize_loss, l2_regularizer l2 = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) \ if args.l2_decay else 0.0 loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) + l2 train_op = optimize_loss(loss, learning_rate=params.learning_rate, clip_gradients=5.0, optimizer='Adam') accuracy = tf.metrics.accuracy(labels, preds, name='acc_op') tf.summary.scalar('acc', tf.identity(accuracy[1], 'acc_tensor')) tf.summary.scalar('loss', tf.identity(loss, 'loss_tensor')) tf.summary.histogram('logits', logits) tf.summary.histogram('output_probs', output_probs) tf.summary.histogram('combined_rnn_state', final_state) if mode == tf.estimator.ModeKeys.TRAIN: ops_hook = hooks.CustomOpsHook([accuracy]) def over_fifty(datum, tensor): """Returns True if there exists an entry in output_probs over 0.5. Args: datum: DebugTensorDatum tensor: dumped tensor value a `numpy.ndarray` """ if 'predictions/output_probs' in datum.tensor_name: return tensor.max() > 0.5 return False training_hooks = [ops_hook] if params.debug: debug_hook = tfdbg.LocalCLIDebugHook() debug_hook.add_tensor_filter('over_fifty', over_fifty) training_hooks.append(debug_hook) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=training_hooks) elif mode == tf.estimator.ModeKeys.EVAL: early_stopping_hook = hooks.EarlyStoppingHook(metric=accuracy, max_metric=0.98, patience=20000) return tf.estimator.EstimatorSpec( mode, loss=loss, evaluation_hooks=[early_stopping_hook], eval_metric_ops={'acc': accuracy})
def model_fn(features, labels, mode, params): with tf.variable_scope('model_fn', values=[features, labels]): # 20 Newsgroups dataset has 20 unique labels. num_classes = 20 # Load and build the embedding layer, initialized with # pre-trained GloVe embeddings. # Has shape (batch_size, max_seq_len, embed_size) embedded_features, emb_hook = components.glove_embed( features, embed_shape=(params.vocab_size, params.embed_size), vocabulary=params.vocab) with tf.variable_scope('deep_blstm'): def deep_lstm(): if mode == tf.estimator.ModeKeys.TRAIN: return MultiRNNCell([ DropoutWrapper(LSTMCell(params.state_size), state_keep_prob=0.5) for _ in range(params.num_layers) ]) else: return MultiRNNCell([ LSTMCell(params.state_size) for _ in range(params.num_layers) ]) cell_fw = deep_lstm() cell_bw = deep_lstm() # Use tf.nn.bidirectional_dynamic_rnn for efficient computation. # It utilizes TensorFlow's tf.while_loop to repeatedly # call cell(...) over the sequential embedded_features. # # Returns: # outputs: tuple (output_fw, output_bw) containing fw and bw rnn output Tensor, # where each has shape (batch size, max_seq_len, cell.output_size) # output_states: tuple (output_state_fw, output_state_bw) containing fw and bw # final states of bidirectional rnn. outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=embedded_features, dtype=tf.float32) # Each output_state is a tuple of length num_layers, # and the i'th element is an LSTMStateTuple, representing the # final state of the i'th layer. output_state_fw, output_state_bw = output_states def concat_lstms(lstms): """Merges list of LSTMStateTuple into a single LSTMStateTuple.""" return LSTMStateTuple(c=tf.concat([lstm.c for lstm in lstms], axis=-1), h=tf.concat([lstm.h for lstm in lstms], axis=-1)) # First, concatentate each output_state LSTMStateTuple, such that the # result is a single LSTMStatTuple for each (instead of num_layers many). output_state_fw = concat_lstms(output_state_fw) output_state_bw = concat_lstms(output_state_bw) # Then, combine the forward and backward output states. combined_final_state = tf.concat( [output_state_fw.h, output_state_bw.h], axis=-1) # We project the final output state to obtain # the logits over each of our possible classes (labels). logits = tf.layers.Dense(num_classes)(combined_final_state) with tf.variable_scope('predictions'): output_probs = tf.nn.softmax(logits) preds = tf.argmax(output_probs, -1) # For PREDICT mode, compute predicted label for each example in batch. if mode == tf.estimator.ModeKeys.PREDICT: # Create table for converting prediction index -> label. table = tf.contrib.lookup.index_to_string_table_from_tensor( params.vocab) # Convert each prediction index to the corresponding label. preds_words = table.lookup(preds) return tf.estimator.EstimatorSpec(mode, predictions=preds_words) with tf.variable_scope('train_and_eval', values=[labels, logits]): loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_or_create_global_step(), learning_rate=1e-3, clip_gradients=5.0, optimizer='Adam') with tf.variable_scope('metrics', values=[labels, preds]): accuracy = tf.metrics.accuracy(labels, preds, name='acc_op') metrics = {'acc': accuracy} tf.summary.scalar('accuracy', accuracy[1]) tf.summary.histogram('output_probs', output_probs) tf.summary.histogram('combined_rnn_state', combined_final_state) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) if mode == tf.estimator.ModeKeys.TRAIN: print('PDD:', params.processed_data_dir) vocab_path = os.path.join( os.path.realpath(params.processed_data_dir), 'vocab_{}.txt'.format(params.vocab_size)) print('VP:', vocab_path) config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig() embz = config.embeddings.add() embz.tensor_name = emb_hook._embed_tensor.name embz.metadata_path = vocab_path writer = tf.summary.FileWriter(params.model_dir) tf.contrib.tensorboard.plugins.projector.visualize_embeddings( writer, config) def my_formatter(tag_to_tensor): res = '' for tag, tensor in tag_to_tensor.items(): res += ' {}={:.2f}'.format(tag, tensor) return res logging_hook = tf.train.LoggingTensorHook( { 'step': tf.train.get_global_step(), 'loss': loss, 'acc': accuracy[0] }, every_n_iter=params.steps_per_print, formatter=my_formatter) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
def model_fn(features, labels, mode, params): # 20 Newsgroups dataset has 20 unique labels. num_classes = 20 # Load and build the embedding layer, initialized with # pre-trained GloVe embeddings. # Has shape (batch_size, max_seq_len, embed_size) embedded_features = components.glove_embed( features, embed_shape=(params.vocab_size, params.embed_size), vocabulary=params.vocab) # Define LSTMCell with state size of 128. cell = tf.nn.rnn_cell.LSTMCell(128) # Use tf.nn.dynamic_rnn for efficient computation. # It utilizes TensorFlow's tf.while_loop to repeatedly # call cell(...) over the sequential embedded_features. # # Returns: # the full output sequence as `outputs` tensor, # which has shape (batch_size, max_seq_len, 128) # the final LSTMStateTuple(c_final, h_final), where both # c_final and h_final have shape (batch_size, 128) outputs, state = tf.nn.dynamic_rnn( cell=cell, inputs=embedded_features, dtype=tf.float32) # We project the final output state to obtain # the logits over each of our possible classes (labels). logits = tf.layers.Dense(num_classes)(state.h) # For PREDICT mode, compute predicted label for each example in batch. if mode == tf.estimator.ModeKeys.PREDICT: output_probs = tf.nn.softmax(logits) preds = tf.argmax(output_probs, -1) # Create table for converting prediction index -> label. table = tf.contrib.lookup.index_to_string_table_from_tensor(params.vocab) # Convert each prediction index to the corresponding label. preds_words = table.lookup(preds) return tf.estimator.EstimatorSpec( mode, predictions=preds_words) loss = tf.losses.sparse_softmax_cross_entropy( labels=labels, logits=logits) with tf.variable_scope('accuracy', values=[labels, logits]): output_probs = tf.nn.softmax(logits) num_correct = tf.to_float(tf.nn.in_top_k( output_probs, tf.to_int64(labels), 1)) accuracy = tf.reduce_mean(num_correct) if mode == tf.estimator.ModeKeys.EVAL: preds = tf.argmax(output_probs, -1) eval_metric_ops = {'acc': tf.metrics.accuracy(labels, preds)} return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_or_create_global_step(), learning_rate=1e-3, optimizer='Adam') if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('acc', accuracy) logging_hook = tf.train.LoggingTensorHook({ 'step': tf.train.get_global_step(), 'loss': loss, 'acc': accuracy }, every_n_iter=params.steps_per_print) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])