Example #1
0
def model_fn(features, labels, mode, params):
    """
    Args:
        features: Tensor with shape (batch_size, max_seq_len) with dtype int.
        labels: same as features, since we are training an autoencoder.
        params: tf.contrib.HParams object containing...
            embed_size, vocab_size
    """
    if mode == tf.estimator.ModeKeys.PREDICT:
        features = features['x']

    # Load and build the embedding layer, initialized with pre-trained
    # GloVe embeddings.
    embedded_features = components.glove_embed(features,
                                               embed_shape=(params.vocab_size,
                                                            params.embed_size),
                                               vocabulary=params.vocab)

    hidden_layer = tf.layers.Dense(params.hidden_size,
                                   activation=tf.tanh)(embedded_features)
    logits = tf.layers.Dense(params.vocab_size)(hidden_layer)

    if mode == tf.estimator.ModeKeys.PREDICT:
        output_probs = tf.nn.softmax(logits)
        preds = tf.argmax(output_probs, -1)
        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            params.vocab)
        preds_words = table.lookup(preds)
        return tf.estimator.EstimatorSpec(mode, predictions=preds_words)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.train.get_or_create_global_step(),
        learning_rate=0.01,
        optimizer='Adam')

    with tf.variable_scope('accuracy', values=[labels, logits]):
        flattened_logits = tf.reshape(
            logits, [params.batch_size * params.max_seq_len, -1])
        flattened_labels = tf.reshape(labels, [
            params.batch_size * params.max_seq_len,
        ])
        output_probs = tf.nn.softmax(flattened_logits)
        num_correct = tf.to_float(
            tf.nn.in_top_k(output_probs, tf.to_int64(flattened_labels), 1))
        accuracy = tf.reduce_mean(num_correct)

    tf.summary.scalar('accuracy', accuracy)
    if mode == tf.estimator.ModeKeys.TRAIN:
        logging_hook = tf.train.LoggingTensorHook(
            {
                'loss': loss,
                'acc': accuracy
            }, every_n_iter=100)
        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[logging_hook])
def model_fn(features, labels, mode, params):
    print('=' * 50, '\n', 'MODEL_FN CALLED IN MODE', mode, '\n', '=' * 50)

    # Place any special preprocessing needed for PREDICT and/or serving here:
    if mode == tf.estimator.ModeKeys.PREDICT:
        input_doc = features['x']
        table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=params.vocab_path,
            vocab_size=params.vocab_size,
            default_value=1)
        features = table.lookup(input_doc)

    # Load and build the embedding layer, initialized with
    # pre-trained GloVe embeddings.
    # Has shape (batch_size, max_seq_len, embed_size)
    with tf.device('/cpu:0'):
        embedded_features = components.glove_embed(
            features,
            embed_shape=(params.vocab_size, params.embed_size),
            vocab_path=params.vocab_path,
            projector_path=params.model_dir)

    with tf.device('/{}:0'.format(args.device)):
        # Ensure that `embedded_features` has rank 3 before executing any further.
        with tf.control_dependencies([tf.assert_rank(embedded_features, 3)]):
            _, final_state = components.deep_blstm(
                inputs=embedded_features,
                mode=mode,
                state_size=params.state_size,
                num_layers=params.num_layers,
                dropout_prob=params.dropout_prob)

        if params.l2_decay:
            l2_regularize(final_state, params.l2_decay, 'final_state_l2')

        # We project the final output state to obtain
        # the logits over each of our possible classes (labels).
        logits = tf.layers.Dense(params.num_labels,
                                 kernel_regularizer=functools.partial(
                                     l2_regularize,
                                     scale=params.l2_decay,
                                     name='logits_kernel_l2'))(final_state)

        with tf.variable_scope('predictions'):
            output_probs = tf.nn.softmax(logits, name='output_probs')
            preds = tf.argmax(output_probs, -1, name='preds')
            if params.debug:
                preds = tf.Print(preds, [preds],
                                 'Preds print: ',
                                 first_n=10,
                                 summarize=5)

    # Create table for converting prediction index -> label.
    table = tf.contrib.lookup.index_to_string_table_from_file(
        params.labels_path)
    # Convert each prediction index to the corresponding label.
    preds_words = tf.identity(table.lookup(preds), 'preds_words')
    # For PREDICT mode, compute predicted label for each example in batch.
    if mode == tf.estimator.ModeKeys.PREDICT:
        pred_out = tf.estimator.export.PredictOutput(
            {'preds_words': preds_words})
        return tf.estimator.EstimatorSpec(
            mode,
            predictions=preds_words,
            export_outputs={'export_outputs': pred_out},
            prediction_hooks=[
                hooks.FreezingHook(
                    'preds_words',
                    os.path.join(params.model_dir, 'frozen_model.pb'))
            ],
            scaffold=tf.train.Scaffold(init_op=tf.tables_initializer()))

    with tf.device('/{}:0'.format(args.device)):
        # from tensorflow.contrib.layers import optimize_loss, l2_regularizer
        l2 = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) \
            if args.l2_decay else 0.0
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits) + l2
        train_op = optimize_loss(loss,
                                 learning_rate=params.learning_rate,
                                 clip_gradients=5.0,
                                 optimizer='Adam')

    accuracy = tf.metrics.accuracy(labels, preds, name='acc_op')
    tf.summary.scalar('acc', tf.identity(accuracy[1], 'acc_tensor'))
    tf.summary.scalar('loss', tf.identity(loss, 'loss_tensor'))
    tf.summary.histogram('logits', logits)
    tf.summary.histogram('output_probs', output_probs)
    tf.summary.histogram('combined_rnn_state', final_state)

    if mode == tf.estimator.ModeKeys.TRAIN:
        ops_hook = hooks.CustomOpsHook([accuracy])

        def over_fifty(datum, tensor):
            """Returns True if there exists an entry in output_probs over 0.5.
            Args:
                datum: DebugTensorDatum
                tensor: dumped tensor value a `numpy.ndarray`
            """
            if 'predictions/output_probs' in datum.tensor_name:
                return tensor.max() > 0.5
            return False

        training_hooks = [ops_hook]
        if params.debug:
            debug_hook = tfdbg.LocalCLIDebugHook()
            debug_hook.add_tensor_filter('over_fifty', over_fifty)
            training_hooks.append(debug_hook)

        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=training_hooks)
    elif mode == tf.estimator.ModeKeys.EVAL:
        early_stopping_hook = hooks.EarlyStoppingHook(metric=accuracy,
                                                      max_metric=0.98,
                                                      patience=20000)
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            evaluation_hooks=[early_stopping_hook],
            eval_metric_ops={'acc': accuracy})
Example #3
0
def model_fn(features, labels, mode, params):

    with tf.variable_scope('model_fn', values=[features, labels]):
        # 20 Newsgroups dataset has 20 unique labels.
        num_classes = 20

        # Load and build the embedding layer, initialized with
        # pre-trained GloVe embeddings.
        # Has shape (batch_size, max_seq_len, embed_size)
        embedded_features, emb_hook = components.glove_embed(
            features,
            embed_shape=(params.vocab_size, params.embed_size),
            vocabulary=params.vocab)

        with tf.variable_scope('deep_blstm'):

            def deep_lstm():
                if mode == tf.estimator.ModeKeys.TRAIN:
                    return MultiRNNCell([
                        DropoutWrapper(LSTMCell(params.state_size),
                                       state_keep_prob=0.5)
                        for _ in range(params.num_layers)
                    ])
                else:
                    return MultiRNNCell([
                        LSTMCell(params.state_size)
                        for _ in range(params.num_layers)
                    ])

            cell_fw = deep_lstm()
            cell_bw = deep_lstm()

            # Use tf.nn.bidirectional_dynamic_rnn for efficient computation.
            # It utilizes TensorFlow's tf.while_loop to repeatedly
            # call cell(...) over the sequential embedded_features.
            #
            # Returns:
            #   outputs: tuple (output_fw, output_bw) containing fw and bw rnn output Tensor,
            #       where each has shape (batch size, max_seq_len, cell.output_size)
            #   output_states: tuple (output_state_fw, output_state_bw) containing fw and bw
            #       final states of bidirectional rnn.
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=embedded_features,
                dtype=tf.float32)

            # Each output_state is a tuple of length num_layers,
            # and the i'th element is an LSTMStateTuple, representing the
            # final state of the i'th layer.
            output_state_fw, output_state_bw = output_states

            def concat_lstms(lstms):
                """Merges list of LSTMStateTuple into a single LSTMStateTuple."""
                return LSTMStateTuple(c=tf.concat([lstm.c for lstm in lstms],
                                                  axis=-1),
                                      h=tf.concat([lstm.h for lstm in lstms],
                                                  axis=-1))

            # First, concatentate each output_state LSTMStateTuple, such that the
            # result is a single LSTMStatTuple for each (instead of num_layers many).
            output_state_fw = concat_lstms(output_state_fw)
            output_state_bw = concat_lstms(output_state_bw)

            # Then, combine the forward and backward output states.
            combined_final_state = tf.concat(
                [output_state_fw.h, output_state_bw.h], axis=-1)

        # We project the final output state to obtain
        # the logits over each of our possible classes (labels).
        logits = tf.layers.Dense(num_classes)(combined_final_state)

        with tf.variable_scope('predictions'):
            output_probs = tf.nn.softmax(logits)
            preds = tf.argmax(output_probs, -1)

        # For PREDICT mode, compute predicted label for each example in batch.
        if mode == tf.estimator.ModeKeys.PREDICT:
            # Create table for converting prediction index -> label.
            table = tf.contrib.lookup.index_to_string_table_from_tensor(
                params.vocab)
            # Convert each prediction index to the corresponding label.
            preds_words = table.lookup(preds)
            return tf.estimator.EstimatorSpec(mode, predictions=preds_words)

        with tf.variable_scope('train_and_eval', values=[labels, logits]):
            loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                          logits=logits)
            train_op = tf.contrib.layers.optimize_loss(
                loss=loss,
                global_step=tf.train.get_or_create_global_step(),
                learning_rate=1e-3,
                clip_gradients=5.0,
                optimizer='Adam')

        with tf.variable_scope('metrics', values=[labels, preds]):
            accuracy = tf.metrics.accuracy(labels, preds, name='acc_op')
            metrics = {'acc': accuracy}
            tf.summary.scalar('accuracy', accuracy[1])
            tf.summary.histogram('output_probs', output_probs)
            tf.summary.histogram('combined_rnn_state', combined_final_state)

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        if mode == tf.estimator.ModeKeys.TRAIN:

            print('PDD:', params.processed_data_dir)
            vocab_path = os.path.join(
                os.path.realpath(params.processed_data_dir),
                'vocab_{}.txt'.format(params.vocab_size))
            print('VP:', vocab_path)

            config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
            embz = config.embeddings.add()
            embz.tensor_name = emb_hook._embed_tensor.name
            embz.metadata_path = vocab_path
            writer = tf.summary.FileWriter(params.model_dir)
            tf.contrib.tensorboard.plugins.projector.visualize_embeddings(
                writer, config)

            def my_formatter(tag_to_tensor):
                res = ''
                for tag, tensor in tag_to_tensor.items():
                    res += '  {}={:.2f}'.format(tag, tensor)
                return res

            logging_hook = tf.train.LoggingTensorHook(
                {
                    'step': tf.train.get_global_step(),
                    'loss': loss,
                    'acc': accuracy[0]
                },
                every_n_iter=params.steps_per_print,
                formatter=my_formatter)
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op,
                                              training_hooks=[logging_hook])
def model_fn(features, labels, mode, params):
    # 20 Newsgroups dataset has 20 unique labels.
    num_classes = 20

    # Load and build the embedding layer, initialized with
    # pre-trained GloVe embeddings.
    # Has shape (batch_size, max_seq_len, embed_size)
    embedded_features = components.glove_embed(
        features,
        embed_shape=(params.vocab_size, params.embed_size),
        vocabulary=params.vocab)

    # Define LSTMCell with state size of 128.
    cell = tf.nn.rnn_cell.LSTMCell(128)

    # Use tf.nn.dynamic_rnn for efficient computation.
    # It utilizes TensorFlow's tf.while_loop to repeatedly
    # call cell(...) over the sequential embedded_features.
    #
    # Returns:
    #   the full output sequence as `outputs` tensor,
    #       which has shape (batch_size, max_seq_len, 128)
    #   the final LSTMStateTuple(c_final, h_final), where both
    #   c_final and h_final have shape (batch_size, 128)
    outputs, state = tf.nn.dynamic_rnn(
        cell=cell, inputs=embedded_features, dtype=tf.float32)

    # We project the final output state to obtain
    # the logits over each of our possible classes (labels).
    logits = tf.layers.Dense(num_classes)(state.h)

    # For PREDICT mode, compute predicted label for each example in batch.
    if mode == tf.estimator.ModeKeys.PREDICT:
        output_probs = tf.nn.softmax(logits)
        preds = tf.argmax(output_probs, -1)
        # Create table for converting prediction index -> label.
        table = tf.contrib.lookup.index_to_string_table_from_tensor(params.vocab)
        # Convert each prediction index to the corresponding label.
        preds_words = table.lookup(preds)
        return tf.estimator.EstimatorSpec(
            mode, predictions=preds_words)

    loss = tf.losses.sparse_softmax_cross_entropy(
        labels=labels, logits=logits)

    with tf.variable_scope('accuracy', values=[labels, logits]):
        output_probs = tf.nn.softmax(logits)
        num_correct = tf.to_float(tf.nn.in_top_k(
            output_probs, tf.to_int64(labels), 1))
        accuracy = tf.reduce_mean(num_correct)

    if mode == tf.estimator.ModeKeys.EVAL:
        preds = tf.argmax(output_probs, -1)
        eval_metric_ops = {'acc': tf.metrics.accuracy(labels, preds)}
        return tf.estimator.EstimatorSpec(
            mode, loss=loss, eval_metric_ops=eval_metric_ops)

    train_op = tf.contrib.layers.optimize_loss(
        loss=loss, global_step=tf.train.get_or_create_global_step(),
        learning_rate=1e-3,
        optimizer='Adam')

    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.summary.scalar('acc', accuracy)
        logging_hook = tf.train.LoggingTensorHook({
            'step': tf.train.get_global_step(),
            'loss': loss,
            'acc': accuracy
        }, every_n_iter=params.steps_per_print)
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            training_hooks=[logging_hook])