Beispiel #1
0
 def __wordchr_embedding_conv1d(self,
                                inputs,
                                keep_prob=0.5,
                                scope='wordchr-embedding-conv1d'):
     """Compute character embeddings by masked conv1d and max-pooling
     """
     with tf.variable_scope(scope):
         with tf.device('/cpu:0'):
             chr_embeddings = tf.Variable(tf.random_uniform(
                 [self.chr_vocab_size, self.chr_dim], -1.0, 1.0),
                                          name='chr_embeddings')
             wordchr_embeddings_t = tf.nn.embedding_lookup(
                 chr_embeddings, inputs
             )  # (batch_size, sentence_length, word_length, chr_dim)
             wordchr_embeddings_t = tf.nn.dropout(wordchr_embeddings_t,
                                                  keep_prob)
         wordchr_embeddings_t = tf.reshape(
             wordchr_embeddings_t,
             [-1, self.word_length, self.chr_dim
              ])  # (batch_size*sentence_length, word_length, chr_dim)
         # masking
         t = tf.reshape(inputs,
                        [-1, self.word_length
                         ])  # (batch_size*sentence_length, word_length)
         masks = self.__compute_word_masks(
             t)  # (batch_size*sentence_length, word_length)
         filters = self.__num_filters
         kernel_size = self.__filter_sizes[0]
         wordchr_embeddings = masked_conv1d_and_max(wordchr_embeddings_t,
                                                    masks, filters,
                                                    kernel_size, tf.nn.relu)
         # (batch_size*sentence_length, filters) -> (batch_size, sentence_length, filters)
         wordchr_embeddings = tf.reshape(
             wordchr_embeddings, [-1, self.sentence_length, filters])
         return tf.nn.dropout(wordchr_embeddings, keep_prob)
def model_fn(features, labels, mode, params):
    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars), add_features = features
    add_features = add_features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable(
        'chars', [num_chars + 1, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
                                        training=training)

    # Char LSTM
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(
        char_embeddings, weights, params['filters'], params['kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)


   #Attention
    #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True)
    num_units = 200
    #W1 =  tf.get_variable("W1", [num_units, num_units], dtype=tf.float32)
    W1 =  tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32)
    b1 =  tf.get_variable("b1", [num_units, ], dtype=tf.float32)
    q = tf.tensordot(output, W1, axes=[[2], [0]])
    out_shape = tf.shape(output)
    #b1_shuffled = self.b1.dimshuffle('x', 'x', 0)
    b1_shuffled = tf.expand_dims(b1, 0)
    b1_shuffled = tf.expand_dims(b1_shuffled, 0)
    #print("b shape",tf.shape(b1_shuffled))
    q += b1_shuffled
    q = tf.tanh(q)
    q_trans = tf.transpose(q, perm=[0, 2, 1])
    #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1))
    out = tf.matmul(q, q_trans)
    #print("out dimension",out.shape)
    out *= (1 - tf.eye(out_shape[1], out_shape[1]))
    matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1])))
    matrix =  tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1]))
    #print("new dimension",matrix.shape)
    atten_out = tf.matmul(matrix,output)
    #print("atten dimension",atten_out.shape)
    #print("output dimension",output.shape)
    output = tf.concat([output, atten_out], axis=-1)
    output = tf.layers.dropout(output, rate=dropout, training=training)



    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, train_op=train_op)
Beispiel #3
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars + 1, params['dim_chars']],
                               tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                            params['filters'],
                                            params['kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Beispiel #4
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1]
    # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(
            indices) + 1  # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings,学习字符嵌入向量
    char_ids = vocab_chars.lookup(chars)
    # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后
    # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']],
                               dtype=tf.float32)
    # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars'])))
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码
    mask = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, mask,
                                            params['filters'],
                                            params['kernel_size'])

    # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # Bi-LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关
    # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征
    logits = tf.layers.dense(
        output,
        num_tags)  # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        mask = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, mask),
            'precision': precision(tags, pred_ids, num_tags, indices, mask),
            'recall': recall(tags, pred_ids, num_tags, indices, mask),
            'f1': f1(tags, pred_ids, num_tags, indices, mask),
        }
        # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而
        # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0]
        # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
                loss,
                global_step=tf.train.get_or_create_global_step())  # 默认学习率1e-3
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Beispiel #5
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features

    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars + 1, params['dim_chars']],
                               tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
    #                                     training=training)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                            params['char_filters'],
                                            params['char_kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['w2v'])['embeddings']  # np.array
    print("glove shape", glove.shape)
    variable = np.vstack([glove,
                          [[0.] * params['dim']]])  # [vob_size, emb_size]
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # sess = tf.InteractiveSession()
    # emb_shape = sess.run(tf.shape(embeddings))
    # print("-"*50,'emb_shape:',emb_shape)
    # block_unflat_scores shape: [batch_size, max_seq_len, class_num]
    block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False)
    pred_ids = tf.argmax(block_unflat_scores[-1], 2)
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32)
        # input_mask = tf.ones_like(words,dtype=tf.int32)
        # for i, real_seq_len in enumerate(nwords):
        #    input_mask[i, real_seq_len:] = 0
        # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int")
        # for i, real_seq_len in enumerate(nwords.eval()):
        #    input_mask[i, real_seq_len:] = 0

        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            loss = tf.constant(0.0)
            # labels = tf.cast(labels, 'int32')
            # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape])
            # print(block_unflat_scores[-1].shape)
            # tags = tf.Print(tags,[tags.shape])
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=block_unflat_scores[-1], labels=tags)
            # masked_losses = tf.multiply(losses, input_mask)
            # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask))
            loss += tf.reduce_sum(losses)
            loss += params["l2_penalty"] * l2_loss

        # Metrics
        weights = tf.sequence_mask(nwords)
        # tags_min = tf.reduce_min(tags)
        # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min")
        # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags")
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Beispiel #6
0
def graph_fn(features, labels, mode, params, reuse=None, getter=None):
    # Read vocabs and inputs
    num_tags = params['num_tags']
    (words, nwords), (chars, nchars) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    with tf.variable_scope('graph', reuse=reuse, custom_getter=getter):
        # Read vocabs and inputs
        dropout = params['dropout']
        (words, nwords), (chars, nchars) = features
        training = (mode == tf.estimator.ModeKeys.TRAIN)
        vocab_words = tf.contrib.lookup.index_table_from_file(
            params['words'], num_oov_buckets=params['num_oov_buckets'])
        vocab_chars = tf.contrib.lookup.index_table_from_file(
            params['chars'], num_oov_buckets=params['num_oov_buckets'])
        with Path(params['tags']).open() as f:
            indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
            num_tags = len(indices) + 1
        with Path(params['chars']).open() as f:
            num_chars = sum(1 for _ in f) + params['num_oov_buckets']

        # Char Embeddings
        char_ids = vocab_chars.lookup(chars)
        variable = tf.get_variable('chars_embeddings',
                                   [num_chars + 1, params['dim_chars']],
                                   tf.float32)
        char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
        char_embeddings = tf.layers.dropout(char_embeddings,
                                            rate=dropout,
                                            training=training)

        # Char LSTM
        weights = tf.sequence_mask(nchars)
        char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                                params['filters'],
                                                params['kernel_size'])

        # Word Embeddings
        word_ids = vocab_words.lookup(words)
        glove = np.load(params['glove'])['embeddings']  # np.array
        variable = np.vstack([glove, [[0.] * params['dim']]])
        variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
        word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

        # Concatenate Word and Char Embeddings
        embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
        embeddings = tf.layers.dropout(embeddings,
                                       rate=dropout,
                                       training=training)

        # LSTM
        t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
        lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
        lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
        lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
        output_fw, _ = lstm_cell_fw(t,
                                    dtype=tf.float32,
                                    sequence_length=nwords)
        output_bw, _ = lstm_cell_bw(t,
                                    dtype=tf.float32,
                                    sequence_length=nwords)
        output = tf.concat([output_fw, output_bw], axis=-1)
        output = tf.transpose(output, perm=[1, 0, 2])
        output = tf.layers.dropout(output, rate=dropout, training=training)

        # CRF
        logits = tf.layers.dense(output, num_tags)
        crf_params = tf.get_variable("crf", [num_tags, num_tags],
                                     dtype=tf.float32)
        return logits, crf_params
Beispiel #7
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    (words, nwords), (chars, nchars) = features

    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)  #[[a,b][c,z]] => [[0,1][2,25]]
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']],
                               tf.float32)  #dimension char embeddings [86,100]
    char_embeddings = tf.nn.embedding_lookup(
        variable, char_ids
    )  #char_ids [0,1] 0 va prendre le premier vecteur (variable [0,:]), donc [[0,1][2,25]] => [[variable[0,:],variable[1,:]][variable[2,:],variable[25,:]]]
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)  #50% de l'entrée

    # Char LSTM
    dim_words = tf.shape(
        char_embeddings
    )[1]  #max dim word (time len)(or number of chars max of a word)[nombre de phrase(batch),nombre de mots max,time len, dim char 100]
    dim_chars = tf.shape(
        char_embeddings
    )[2]  #dimension de char 100 [nombre de phrase(batch),nombre de mots max,time len ,dim char 100]

    flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']
                                        ])  #[?,max len word(or time len),100]
    t = tf.transpose(flat,
                     perm=[1, 0,
                           2])  #[max len word(or time len),?,100] time major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(
                                         nchars, [-1]))  #we take last state
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(
                                         nchars, [-1]))  #we take last state

    output = tf.concat(
        [output_fw, output_bw],
        axis=-1)  #concat on the last D dimension of tensors 25+25

    char_embeddings_lstm = tf.reshape(
        output, [-1, params['char_lstm_size'] * 2])  # [b,t,D]
    char_embeddings_lstm = tf.expand_dims(char_embeddings_lstm, -2)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings_cnn = masked_conv1d_and_max(char_embeddings, weights,
                                                params['filters'],
                                                params['kernel_size'])
    char_embeddings_cnn = tf.reshape(char_embeddings_cnn,
                                     [-1, params['filters']])
    char_embeddings_cnn = tf.expand_dims(char_embeddings_cnn, -2)

    #concat cnn and lstm char embeddings
    char_embeddings = tf.concat([char_embeddings_cnn, char_embeddings_lstm],
                                axis=-2)

    #attention
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(char_embeddings,
                                             params['char_lstm_size'] * 2,
                                             time_major=False,
                                             return_alphas=True)
        tf.summary.histogram('alphas', alphas)

    char_embeddings = tf.reshape(attention_output,
                                 [-1, dim_words, params['char_lstm_size'] * 2])

    # Word Embeddings
    word_ids = vocab_words.lookup(
        words
    )  #[[b'Peter', b'Blackburn'],[b'Yac', b'Amirat']] => [[b'0', b'1'],[b'2', b'3']]
    glove = np.load(params['glove'])[
        'embeddings']  # np.array glove made of vocab words (reduces list)
    variable = np.vstack([glove, [[0.] * params['dim']]
                          ])  #concatenate on -1 axis, glove + [[0.]]
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(
        variable, word_ids
    )  #[[b'0', b'1'],[b'2', b'3']] => [[b'variable[0]', b'variable[1]'],[b'variable[2]', b'variable[3]']] [2,2,300]

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings],
                           axis=-1)  #concat on the last dimension axis 100+300
    embeddings = tf.layers.dropout(embeddings, rate=dropout,
                                   training=training)  #50% de l'entrée

    # LSTM for lstm
    t = tf.transpose(
        embeddings, perm=[1, 0, 2]
    )  # Need time-major #put the word dim as first dimension. check batch-major VS time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    #ELMO
    elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False)
    word_embeddings = elmo(inputs={
        "tokens": words,
        "sequence_len": nwords
    },
                           signature="tokens",
                           as_dict=True)["elmo"]

    # Concatenate output LSTM1 and ELMO Embeddings, dropout
    embeddings = tf.concat([word_embeddings, output], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM 2
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(
        output, num_tags
    )  #nn dense input : (output of bilstm), output dimension : same shape excpet last dim will be num of tags
    crf_params = tf.get_variable(
        "crf", [num_tags, num_tags],
        dtype=tf.float32)  #variable of crf pars matrix num_tags*num_tags
    pred_ids, _ = tf.contrib.crf.crf_decode(
        logits, crf_params, nwords
    )  #decode_tags: A [batch_size, max_seq_len] matrix, with dtype tf.int32. Contains the highest scoring tag indices.
    #potentials(logits): A [batch_size, max_seq_len, num_tags] tensor of unary potentials.

    if mode == tf.estimator.ModeKeys.PREDICT:  #prediction
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(
            tf.to_int64(pred_ids)
        )  #indices = tf.constant([1, 5], tf.int64) => ["lake", "UNKNOWN"]
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(
            params['tags'])  #get tags index from file
        tags = vocab_tags.lookup(labels)  #replace lables by thier indexes
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params
        )  #calculate log_likelihood given the real tags, return: A [batch_size] Tensor containing the log-likelihood of each example, given the sequence of tag indices.
        loss = tf.reduce_mean(
            -log_likelihood
        )  #Computes the mean of elements across dimensions of a tensor. x = tf.constant([[1., 1.], [2., 2.]]) tf.reduce_mean(x)  # 1.5

        # Metrics
        weights = tf.sequence_mask(
            nwords
        )  #convert the vector of size n to a matrix of bool of size n * max value in the vector v[1,2] ==> m[[true,false],[true, true]]
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(
                tags, pred_ids, num_tags, indices, weights
            ),  #ground truth, predictions, num of tags 9, The indices of the positive classes, 
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(
                metric_name, op[1]
            )  #for tensor board#tuple of (scalar float Tensor, update_op) op[1] => update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy.

        if mode == tf.estimator.ModeKeys.EVAL:  #Eval
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:  #training
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step()
            )  #adam optimizer operation to optimize the loss, global_step: Optional Variable to increment by one after the variables have been updated.
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
def model_fn(features, labels, mode, params):
    # For serving, features are a bit different
    if isinstance(features, dict):
        features = features['words'], features['nwords']

    # Read vocabs and inputs
    dropout = params['dropout']
    words, nwords = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(params['words'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1

    # words 1d convolution for entity embeddings
    # 先对输入的words 左一层 con1d,保持 当个 word 的向量,同时可以构造 entity(多个word)的向量
    weights = tf.sequence_mask(nchars)
    words_with_entity_embeddings = masked_conv1d_and_max(
        words, weights, params['char_filters'], params['char_kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['w2v'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    embeddings = tf.nn.embedding_lookup(variable, word_ids)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags) # [batch_size, max_seq_len, num_tags]
    pred_ids = tf.argmax(logits, -1)

    # crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    # pred_ids, pred_scores = tf.contrib.crf.crf_decode(logits, crf_params, nwords)
    input_mask = tf.sequence_mask(nwords, params["max_seq_len"], dtype=tf.float32)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings,
            # "scores": pred_scores
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        # log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
        #     logits, tags, nwords, crf_params)
        # loss = tf.reduce_mean(-log_likelihood)
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tags)
        masked_losses = tf.multiply(losses, input_mask)
        # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask))
        loss = tf.reduce_sum(masked_losses)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, train_op=train_op)
Beispiel #9
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if params['elmo'] is False:
        pos = Position_Encoder(40)
    extrator = Cnn_extractor(params['lstm_size'])
    attention = Attention(params['lstm_size'], 2)

    if params['elmo'] is True:
        (words, nwords), (chars, nchars), (elmo_tokens, _) = features
    else:
        (words, nwords), (chars, nchars) = features

    # Read vocabs and inputs
    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open(encoding='utf8') as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open(encoding='utf8') as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars + 1, params['dim_chars']],
                               tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                            params['filters'],
                                            params['kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    if params['elmo'] is True:
        bilm = BidirectionalLanguageModel(options_file, weight_file)
        context_embeddings_op = bilm(elmo_tokens)
        elmo_context_input = weight_layers('input',
                                           context_embeddings_op,
                                           l2_coef=0.0)
        elmo_embeddings = elmo_context_input['weighted_op']
        embeddings = tf.concat(
            [elmo_embeddings, word_embeddings, char_embeddings], axis=-1)
    else:
        embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    print(embeddings)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # additional
    if params['elmo'] is False:
        embeddings_ = pos(embeddings, nwords)
        cnn_features = extrator(embeddings_)
    else:
        cnn_features = extrator(embeddings)
    attn_weights, cnn_logits = attention(cnn_features, nwords)
    attn_weights_ = tf.nn.softmax(attn_weights, -1)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.concat([output, cnn_logits], -1)
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings,
            'attn_weights': attn_weights_
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        attn_tags = tf.cast((tags > 0), tf.int32)
        attn_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=attn_weights, labels=attn_tags)
        attn_ids = tf.argmax(attn_weights, -1)
        attn_mask = tf.sequence_mask(nwords)
        attn_loss = tf.reduce_mean(tf.boolean_mask(attn_loss, attn_mask))
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)
        loss = loss + args.lambda_value * attn_loss

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
            'sac_acc': tf.metrics.accuracy(attn_tags, attn_ids, weights)
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)