def build_graph(self): elmo_bilm = self.elmo_bilm context_elmo_embeddings_op = elmo_bilm(self.memory_elmo_token_ids) query_elmo_embeddings_op = elmo_bilm(self.query_elmo_token_ids) with tf.variable_scope("elmo_encodings_input"): elmo_context_input = weight_layers('input', context_elmo_embeddings_op, l2_coef=0.0)['weighted_op'] context_len = tf.shape(self.memory_vectors)[1] elmo_context_input = elmo_context_input[:, :context_len] with tf.variable_scope("elmo_encodings_input", reuse=True): elmo_query_input = weight_layers('input', query_elmo_embeddings_op, l2_coef=0.0)['weighted_op'] query_len = tf.shape(self.encoder_inputs)[1] elmo_query_input = elmo_query_input[:, :query_len] print("ELMo shapes:") print(elmo_context_input.get_shape().as_list()) print(elmo_query_input.get_shape().as_list()) with tf.device("/cpu:0"): with tf.variable_scope("embedding"): embedded_input_seq = tf.nn.embedding_lookup( self.emb, self.encoder_inputs) embedded_dec_input_seq = tf.nn.embedding_lookup( self.emb, self.decoder_inputs) embedded_dec_target_seq = tf.nn.embedding_lookup( self.emb, self.decoder_targets) embedded_memory_vectors = tf.nn.embedding_lookup( self.emb, self.memory_vectors) enc_hidden_sz = self.opt.hidden_size_encoder enc_num_layers = self.opt.num_layers_encoder # add elmo embedded_memory_vectors = tf.concat( [embedded_memory_vectors, elmo_context_input], -1) embedded_input_seq = tf.concat([embedded_input_seq, elmo_query_input], -1) mem_rep = embedded_memory_vectors print(mem_rep.get_shape().as_list()) for i in range(self.opt.num_attn_hops): with tf.variable_scope("attn_layer_%d" % i): with tf.variable_scope("mem_encoder"): mem_rep, _ = bi_cudnn_rnn_encoder( 'lstm', enc_hidden_sz, enc_num_layers, self.opt.dropout_rate, mem_rep, self.memory_vector_lengths, self.is_training) with tf.variable_scope("ques_encoder"): ques_inp, _ = bi_cudnn_rnn_encoder( 'lstm', enc_hidden_sz, enc_num_layers, self.opt.dropout_rate, embedded_input_seq, self.encoder_input_lengths, self.is_training) # attend mem_rep = bidaf_attention(mem_rep, ques_inp, self.memory_vector_lengths, self.encoder_input_lengths, tri_linear_attention) print(mem_rep.get_shape().as_list()) with tf.variable_scope("res_self_attn"): units = mem_rep.get_shape().as_list()[-1] print(units) mem_proj = tf.layers.dense(inputs=mem_rep, units=units, activation=tf.nn.relu, name="self_attn_input_proj") print(mem_proj.get_shape().as_list()) with tf.variable_scope("input_proj"): self_attn_mem_input, _ = bi_cudnn_rnn_encoder( 'lstm', enc_hidden_sz, enc_num_layers, self.opt.dropout_rate, mem_proj, self.memory_vector_lengths, self.is_training) self_attn_mem = self_attention_encoder( x=self_attn_mem_input, sim_func=tri_linear_attention, mask=self.memory_vector_lengths, merge_function=concat_with_product) print(self_attn_mem.get_shape().as_list()) with tf.variable_scope("output_proj"): self_attn_output_proj, _ = bi_cudnn_rnn_encoder( 'lstm', units / 2, enc_num_layers, self.opt.dropout_rate, self_attn_mem, self.memory_vector_lengths, self.is_training) mem_rep = mem_rep + self_attn_output_proj print(mem_rep.get_shape().as_list()) batch_size = self.opt.batch_size sos_id = self.vocab.start_token_id eos_id = self.vocab.end_token_id dec_hidden_sz = self.opt.hidden_size_encoder dec_num_layers = self.opt.num_layers_decoder train_helper = tf.contrib.seq2seq.TrainingHelper( embedded_dec_input_seq, self.decoder_input_lengths) pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( self.emb, start_tokens=tf.fill([batch_size], sos_id), end_token=-1) # XXX hack here to allow correct loss #eos_id) def decode(helper, scope, reuse=None): with tf.variable_scope(scope, reuse=reuse): attention_over_context = tf.contrib.seq2seq.BahdanauAttention( num_units=self.opt.decoder_attn_size, memory=mem_rep, memory_sequence_length=self.memory_vector_lengths) decoder_cell = create_multi_rnn('basic_lstm', dec_hidden_sz, dec_num_layers, self.keep_prob) projection_layer = layers_core.Dense(self.vocab.size(), use_bias=True, name='output_projection') decoder_cell = AttnPointerWrapper( cell=decoder_cell, attention_mechanism=attention_over_context, output_layer=projection_layer, max_oovs=self.opt.max_oovs, batch_size=self.opt.batch_size, memory_full_vocab=self.memory_vectors_full_vocab, attention_layer_size=self.opt.decoder_attn_size / 2, alignment_history=True, output_combined_distribution=True, unk_id=self.vocab.unk_token_id) decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cell, helper=helper, initial_state=decoder_cell.zero_state( batch_size=self.opt.batch_size, dtype=tf.float32)) outputs = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, maximum_iterations=self.max_decoder_length) return outputs train_outputs, train_state, self.train_final_lengths = decode( train_helper, 'decode') pred_outputs, pred_state, self.pred_final_lengths = decode(pred_helper, 'decode', reuse=True) train_logits = tf.transpose(train_state.final_dist_history.stack(), [1, 0, 2]) pred_logits = tf.transpose(pred_state.final_dist_history.stack(), [1, 0, 2]) self.preds = tf.argmax(pred_logits, axis=2) output_mask = tf.sequence_mask(self.decoder_target_lengths, dtype=tf.float32, maxlen=self.max_decoder_length) self.loss = tf.contrib.seq2seq.sequence_loss( logits=train_logits, targets=self.decoder_targets, weights=output_mask, softmax_loss_function=sparse_cross_entropy_with_probs) self.eval_loss = tf.contrib.seq2seq.sequence_loss( logits=pred_logits, targets=self.decoder_targets, weights=output_mask, softmax_loss_function=sparse_cross_entropy_with_probs) self.graph_built = True
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs (words, nwords), (chars, nchars) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(char_embeddings)[1] dim_chars = tf.shape(char_embeddings)[2] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape(output, [-1, dim_words, 2 * params['char_lstm_size']]) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) #output = tf.layers.dropout(output, rate=dropout, training=training) layers = [] layers.append(char_embeddings) layers.append(output) lm_embeddings = tf.concat([tf.expand_dims(t, axis=1) for t in layers], axis=1) weights = tf.sequence_mask(nwords) bilm_ops = {'lm_embeddings': lm_embeddings, 'mask': weights} weight_sum = weight_layers('elmo_input1', bilm_ops, l2_coef=1.0, do_layer_norm=True, use_top_only=False) output = tf.layers.dropout(weight_sum['weighted_op'], rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def _check_weighted_layer(self, l2_coef, do_layer_norm, use_top_only): # create the Batcher vocab_file = os.path.join(FIXTURES, 'vocab_test.txt') batcher = Batcher(vocab_file, 50) # load the model options_file = os.path.join(FIXTURES, 'options.json') weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5') character_ids = tf.placeholder('int32', (None, None, 50)) model = BidirectionalLanguageModel( options_file, weight_file, max_batch_size=4) bilm_ops = model(character_ids) weighted_ops = [] for k in range(2): ops = weight_layers(str(k), bilm_ops, l2_coef=l2_coef, do_layer_norm=do_layer_norm, use_top_only=use_top_only) weighted_ops.append(ops) # initialize self.sess.run(tf.global_variables_initializer()) n_expected_trainable_weights = 2 * (1 + int(not use_top_only)) self.assertEqual(len(tf.trainable_variables()), n_expected_trainable_weights) # and one regularizer per weighted layer n_expected_reg_losses = 2 * int(not use_top_only) self.assertEqual( len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), n_expected_reg_losses, ) # Set the variables. weights = [[np.array([0.1, 0.3, 0.5]), np.array([1.1])], [np.array([0.2, 0.4, 0.6]), np.array([0.88])]] for k in range(2): with tf.variable_scope('', reuse=True): if not use_top_only: W = tf.get_variable('{}_ELMo_W'.format(k)) _ = self.sess.run([W.assign(weights[k][0])]) gamma = tf.get_variable('{}_ELMo_gamma'.format(k)) _ = self.sess.run([gamma.assign(weights[k][1])]) # make some data sentences = [ ['The', 'first', 'sentence', '.'], ['The', 'second'], ['Third'] ] X_chars = batcher.batch_sentences(sentences) ops = model(character_ids) lm_embeddings, mask, weighted0, weighted1 = self.sess.run( [ops['lm_embeddings'], ops['mask'], weighted_ops[0]['weighted_op'], weighted_ops[1]['weighted_op']], feed_dict={character_ids: X_chars} ) actual_elmo = [weighted0, weighted1] # check the mask first expected_mask = [[True, True, True, True], [True, True, False, False], [True, False, False, False]] self.assertTrue((expected_mask == mask).all()) # Now compute the actual weighted layers for k in range(2): normed_weights = np.exp(weights[k][0] + 1.0 / 3) / np.sum( np.exp(weights[k][0] + 1.0 / 3)) # masked layer normalization expected_elmo = np.zeros((3, 4, lm_embeddings.shape[-1])) if not use_top_only: for j in range(3): # number of LM layers if do_layer_norm: mean = np.mean(lm_embeddings[:, j, :, :][mask]) std = np.std(lm_embeddings[:, j, :, :][mask]) normed_lm_embed = (lm_embeddings[:, j, :, :] - mean) / ( std + 1E-12) expected_elmo += normed_weights[j] * normed_lm_embed else: expected_elmo += normed_weights[j] * lm_embeddings[ :, j, :, :] else: expected_elmo += lm_embeddings[:, -1, :, :] # the scale parameter expected_elmo *= weights[k][1] self.assertTrue( np.allclose(expected_elmo, actual_elmo[k], atol=1e-6) )
question_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file) # Get ops to compute the LM embeddings. context_embeddings_op = bilm(context_character_ids) question_embeddings_op = bilm(question_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question # and context at each of the input and output. # We use the same ELMo weights for both the question and context # at each of the input and output. elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_input = weight_layers( 'input', question_embeddings_op, l2_coef=0.0 ) elmo_context_output = weight_layers( 'output', context_embeddings_op, l2_coef=0.0 ) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question elmo_question_output = weight_layers( 'output', question_embeddings_op, l2_coef=0.0 )
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs (words, nwords), (chars, nchars) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) #[[a,b][c,z]] => [[0,1][2,25]] variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) #dimension char embeddings [86,100] char_embeddings = tf.nn.embedding_lookup( variable, char_ids ) #char_ids [0,1] 0 va prendre le premier vecteur (variable [0,:]), donc [[0,1][2,25]] => [[variable[0,:],variable[1,:]][variable[2,:],variable[25,:]]] char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) #50% de l'entrée # Char LSTM dim_words = tf.shape( char_embeddings )[1] #max dim word (time len)(or number of chars max of a word)[nombre de phrase(batch),nombre de mots max,time len, dim char 100] dim_chars = tf.shape( char_embeddings )[2] #dimension de char 100 [nombre de phrase(batch),nombre de mots max,time len ,dim char 100] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars'] ]) #[?,max len word(or time len),100] t = tf.transpose(flat, perm=[1, 0, 2]) #[max len word(or time len),?,100] time major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state output = tf.concat( [output_fw, output_bw], axis=-1) #concat on the last D dimension of tensors 25+25 char_embeddings_lstm = tf.reshape( output, [-1, dim_words, params['char_lstm_size'] * 2]) # [b,t,D] # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings_cnn = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup( words ) #[[b'Peter', b'Blackburn'],[b'Yac', b'Amirat']] => [[b'0', b'1'],[b'2', b'3']] glove = np.load(params['glove'])[ 'embeddings'] # np.array glove made of vocab words (reduces list) variable = np.vstack([glove, [[0.] * params['dim']] ]) #concatenate on -1 axis, glove + [[0.]] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup( variable, word_ids ) #[[b'0', b'1'],[b'2', b'3']] => [[b'variable[0]', b'variable[1]'],[b'variable[2]', b'variable[3]']] [2,2,300] # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings_lstm], axis=-1) #concat on the last dimension axis 100+300 embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) #50% de l'entrée # LSTM for lstm t = tf.transpose( embeddings, perm=[1, 0, 2] ) # Need time-major #put the word dim as first dimension. check batch-major VS time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size_lstm']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size_lstm']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) # Concatenate Word and Char cnn Embeddings embeddings2 = tf.concat( [word_embeddings, char_embeddings_cnn], axis=-1) #concat on the last dimension axis 100+300 embeddings2 = tf.layers.dropout(embeddings, rate=dropout, training=training) #50% de l'entrée # LSTM fro cnn t2 = tf.transpose( embeddings2, perm=[1, 0, 2] ) # Need time-major #put the word dim as first dimension. check batch-major VS time-major lstm_cell_fw2 = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size_cnn']) lstm_cell_bw2 = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size_cnn']) lstm_cell_bw2 = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw2) output_fw2, _ = lstm_cell_fw(t2, dtype=tf.float32, sequence_length=nwords) output_bw2, _ = lstm_cell_bw(t2, dtype=tf.float32, sequence_length=nwords) output2 = tf.concat([output_fw2, output_bw2], axis=-1) output2 = tf.transpose(output2, perm=[1, 0, 2]) #output = tf.concat([output, output2], axis=-1) layers = [] layers.append(output) layers.append(output2) lm_embeddings = tf.concat([tf.expand_dims(t, axis=1) for t in layers], axis=1) weights = tf.sequence_mask(nwords) bilm_ops = {'lm_embeddings': lm_embeddings, 'mask': weights} weight_sum = weight_layers('elmo_input', bilm_ops, l2_coef=1.0, do_layer_norm=True, use_top_only=False) output = tf.layers.dropout(weight_sum['weighted_op'], rate=dropout, training=training) # CRF logits = tf.layers.dense( output, num_tags ) #nn dense input : (output of bilstm), output dimension : same shape excpet last dim will be num of tags crf_params = tf.get_variable( "crf", [num_tags, num_tags], dtype=tf.float32) #variable of crf pars matrix num_tags*num_tags pred_ids, _ = tf.contrib.crf.crf_decode( logits, crf_params, nwords ) #decode_tags: A [batch_size, max_seq_len] matrix, with dtype tf.int32. Contains the highest scoring tag indices. #potentials(logits): A [batch_size, max_seq_len, num_tags] tensor of unary potentials. if mode == tf.estimator.ModeKeys.PREDICT: #prediction # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup( tf.to_int64(pred_ids) ) #indices = tf.constant([1, 5], tf.int64) => ["lake", "UNKNOWN"] predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file( params['tags']) #get tags index from file tags = vocab_tags.lookup(labels) #replace lables by thier indexes log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params ) #calculate log_likelihood given the real tags, return: A [batch_size] Tensor containing the log-likelihood of each example, given the sequence of tag indices. loss = tf.reduce_mean( -log_likelihood ) #Computes the mean of elements across dimensions of a tensor. x = tf.constant([[1., 1.], [2., 2.]]) tf.reduce_mean(x) # 1.5 # Metrics weights = tf.sequence_mask( nwords ) #convert the vector of size n to a matrix of bool of size n * max value in the vector v[1,2] ==> m[[true,false],[true, true]] metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision( tags, pred_ids, num_tags, indices, weights ), #ground truth, predictions, num of tags 9, The indices of the positive classes, 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar( metric_name, op[1] ) #for tensor board#tuple of (scalar float Tensor, update_op) op[1] => update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy. if mode == tf.estimator.ModeKeys.EVAL: #Eval return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: #training train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize( loss, global_step=tf.train.get_or_create_global_step() ) #adam optimizer operation to optimize the loss, global_step: Optional Variable to increment by one after the variables have been updated. return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)