def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision( label_ids, predictions, 13, [1, 2, 3, 4, 5, 6, 7, 8], average="macro") #13 是总label数,括号里是有用的 recall = tf_metrics.recall(label_ids, predictions, 13, [1, 2, 3, 4, 5, 6, 7, 8], average="macro") f = tf_metrics.f1(label_ids, predictions, 13, [1, 2, 3, 4, 5, 6, 7, 8], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, }
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, num_labels, [1, 2], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [1, 2], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [1, 2], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, }
def metric_fn(label_ids, pred_ids): # 首先对结果进行维特比解码 # crf 解码 indices = [2, 3, 4, 5, 6, 7] # indice参数告诉评估矩阵评估哪些标签,与label_list相对应 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(seq_length, max_len, label_ids, pred_ids): indices = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] # indice参数告诉评估矩阵评估哪些标签 # Metrics weights = tf.sequence_mask(seq_length, maxlen=max_len) metrics = { 'acc': tf.metrics.accuracy(label_ids, pred_ids, weights), 'precision': precision(label_ids, pred_ids, params['num_labels'], indices, weights), 'recall': recall(label_ids, pred_ids, params['num_labels'], indices, weights), 'f1': f1(label_ids, pred_ids, params['num_labels'], indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) return eval_metrics
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") recall = tf_metrics.recall(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") f = tf_metrics.f1(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") loss = tf.metrics.mean(per_example_loss) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, "eval_loss": loss, }
def metric_fn(label_ids, pred_ids, per_example_loss, input_mask): # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"] indices = [2, 3] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, input_mask) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, input_mask) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, input_mask) accuracy = tf.metrics.accuracy(label_ids, pred_ids, input_mask) loss = tf.metrics.mean(per_example_loss) return { 'eval_precision': precision, 'eval_recall': recall, 'eval_f': f, 'eval_accuracy': accuracy, 'eval_loss': loss, }
def compute_metrics(self, tags, pred_ids, num_tags, indices, nwords): weights = tf.sequence_mask(nwords) # metrics_correct_rate, golden, predict = correct_rate(tags, pred_ids) # metrics_correct_rate = correct_rate(tags, pred_ids, weights) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), 'correct_rate': correct_rate(tags, pred_ids, weights), # 'golden': (golden, tf.zeros([], tf.int32)), # 'predict': (predict, tf.zeros([], tf.int32)) } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) return metrics
def metric_fn(label_ids, pred_ids, num_labels): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) pos_indices = [id for id in range(2, num_labels - 3)] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, # 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] precision = tf_metrics.precision( label_ids, pred_ids, num_labels, pos_indices, average="micro") recall = tf_metrics.recall( label_ids, pred_ids, num_labels, pos_indices, average="micro") f = tf_metrics.f1(label_ids, pred_ids, num_labels, pos_indices, average="micro") # hook_dict['precision'] = precision # hook_dict['recall'] = recall # hook_dict['f'] = f # tf.summary.scalar('precision', precision) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) if task_name == "cola": FN, FN_op = tf.metrics.false_negatives(labels=label_ids, predictions=predictions) FP, FP_op = tf.metrics.false_positives(labels=label_ids, predictions=predictions) TP, TP_op = tf.metrics.true_positives(labels=label_ids, predictions=predictions) TN, TN_op = tf.metrics.true_negatives(labels=label_ids, predictions=predictions) MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))**0.5 MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op, tf.identity(MCC, name="MCC")) return {"MCC": (MCC, MCC_op)} elif task_name == "mrpc": accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) f1 = tf_metrics.f1(labels=label_ids, predictions=predictions, num_classes=2, pos_indices=[1]) return { "eval_accuracy": accuracy, "eval_f1": f1, "eval_loss": loss, } else: accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # labels = [] # for i, x in enumerate() predict_labels = [] # for i in range(1, num_labels - 4): # predict_labels.append(i) # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro") # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro") # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro") precision = tf_metrics.precision(label_ids, predictions, num_labels, average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) recall = tf_metrics.recall(label_ids, predictions, num_classes=num_labels, average='micro') precision = tf_metrics.precision(label_ids, predictions, num_classes=num_labels, average='micro') f1 = tf_metrics.f1(label_ids, predictions, num_classes=num_labels, average='micro') loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_recall": recall, 'eval_precision': precision, 'eval_f1': f1, "eval_loss": loss, }
def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) # print(label_ids.get_shape()) #[?,128] # print(pred_ids.get_shape()) #[64,128] # print(num_labels) #17 # print(weight.get_shape()) #[128,] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def eval_phase(label_ids, pred_ids, num_labels): # 首先对结果进行维特比解码 # crf 解码 eval_list = [] assert True == os.path.exists( os.path.join(FLAGS.output_dir, "eval_ids_list.txt")) list_file = open(os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r') contents = list_file.readlines() for item in contents: eval_list.append(int( item.strip())) ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的 assert 0 < len(eval_list) print("eval_list:", eval_list) weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("precision", precision[1]) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("recall", recall[1]) f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("f1", f[1]) return (precision, recall, f)
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) n_class = [i for i in range(len(LABELS) - 2)] precision = tf_metrics.precision(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") recall = tf_metrics.recall(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") f = tf_metrics.f1(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision( label_ids, predictions, len(NerProcessor.get_labels()) + 1, average="macro") recall = tf_metrics.recall(label_ids, predictions, len(NerProcessor.get_labels()) + 1, average="macro") f = tf_metrics.f1(label_ids, predictions, len(NerProcessor.get_labels()) + 1, average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits, is_real_example): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # print("predictions shape: " + str(predictions.get_shape().as_list())) # print("label_ids shape: " + str(label_ids.get_shape().as_list())) # print("is_real_example shape: " + str(is_real_example.get_shape().as_list())) precision = tf_metrics.precision(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, "eval_accuracy": accuracy, "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) print(logits) print(predictions) print(label_ids) #label_ids_array = label_ids.eval() #predictions_array = predictions.eval() #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) #print(predictions_array) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) precision = tf_metrics.precision( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") recall = tf_metrics.recall( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") f = tf_metrics.f1( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") #class_repo = classification_report(label_ids_array, predictions_array ) return { "eval_accuracy": accuracy, "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_class_repo":class_repo, #"eval_loss": loss, }
def metric_fn(label_ids, predicted_labels, input_mask, num_labels): label_ids = tf.boolean_mask(label_ids, input_mask) predicted_labels = tf.boolean_mask(predicted_labels, input_mask) precision = tf_metrics.precision(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") recall = tf_metrics.recall(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") f1 = tf_metrics.f1(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f1 }
def metric_fn(seq_length, max_len, label_ids, pred_ids): indices = [1, 2] # indice参数告诉评估矩阵评估哪些标签 # Metrics weights = tf.sequence_mask(seq_length, maxlen=max_len) tf.logging.info("****shape in metrics***") label_ids_metric = tf.argmax(label_ids, 1) tf.logging.info(label_ids_metric.shape) tf.logging.info(pred_ids.shape) metrics = { 'acc': tf.metrics.accuracy(label_ids_metric, pred_ids), 'precision': precision(label_ids_metric, pred_ids, params['num_labels'], indices), 'recall': recall(label_ids_metric, pred_ids, params['num_labels'], indices), 'f1': f1(label_ids_metric, pred_ids, params['num_labels'], indices), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) return eval_metrics
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E, # 具体见 tf.metrics里的函数 precision = tf_metrics.precision(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") recall = tf_metrics.recall(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") f = tf_metrics.f1(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(max_seq_length) # 0对应填充,1对应SEP, 是命名实体的当做正例(多分类时需要指定的),后面索引对应label_ids的 # precision = TP / (TP + FP) # 预测为正的样本中实际正样本的比例 # recall = TP / (TP + FN) # 实际正样本中预测为正的比例 # accuracy = (TP + TN) / (P + N) # F1-score = 2 / [(1 / precision) + (1 / recall)] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f1 = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f1": f1, # "eval_loss": loss, }
def model_fn(features, labels, mode, params): training = (mode == tf.estimator.ModeKeys.TRAIN) input_ids = features["text"] author_id = features["author"] category_ids = features["categories"] label_id = features["label"] cnn = CnnModel(params, input_ids, author_id, category_ids, training) squeeze_label_ids = tf.squeeze(label_id, axis=1) logits, predict_label_ids, loss = cnn.build_network(squeeze_label_ids,) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions #words = tf.contrib.lookup.index_to_string_table_from_file(params['vocab']) #input_words = words.lookup(tf.to_int64(input_ids)) predictions = { 'true_label_ids': squeeze_label_ids, 'predict_label_ids': predict_label_ids, } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss train_op = optimization.create_optimizer(loss, params['learning_rate'], params['train_steps'], params['num_warmup_steps']) if mode == tf.estimator.ModeKeys.EVAL: # Metrics metrics = { 'acc': tf.metrics.accuracy(squeeze_label_ids, predict_label_ids), # 分别计算各个类的P, R 然后按类求平均值 'precision': precision(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), 'recall': recall(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), 'f1': f1(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), } return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars), add_features = features add_features = add_features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable( 'chars', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max( char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) #Attention #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True) num_units = 200 #W1 = tf.get_variable("W1", [num_units, num_units], dtype=tf.float32) W1 = tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32) b1 = tf.get_variable("b1", [num_units, ], dtype=tf.float32) q = tf.tensordot(output, W1, axes=[[2], [0]]) out_shape = tf.shape(output) #b1_shuffled = self.b1.dimshuffle('x', 'x', 0) b1_shuffled = tf.expand_dims(b1, 0) b1_shuffled = tf.expand_dims(b1_shuffled, 0) #print("b shape",tf.shape(b1_shuffled)) q += b1_shuffled q = tf.tanh(q) q_trans = tf.transpose(q, perm=[0, 2, 1]) #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1)) out = tf.matmul(q, q_trans) #print("out dimension",out.shape) out *= (1 - tf.eye(out_shape[1], out_shape[1])) matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1]))) matrix = tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1])) #print("new dimension",matrix.shape) atten_out = tf.matmul(matrix,output) #print("atten dimension",atten_out.shape) #print("output dimension",output.shape) output = tf.concat([output, atten_out], axis=-1) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): if isinstance(features, dict): features = features['words'], features['nwords'] dropout = params['dropout'] words, nwords = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) dense_layer = tf.layers.Dense(num_tags) logits = dense_layer(output) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) ########## epsilon = 5 perturbed = _add_perturbation(embeddings, loss, epsilon) t = tf.transpose(perturbed, perm=[1, 0, 2]) output_fw1, _1 = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw1, _1 = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output1 = tf.concat([output_fw1, output_bw1], axis=-1) output1 = tf.transpose(output1, perm=[1, 0, 2]) output1 = tf.layers.dropout(output1, rate=dropout, training=training) logits1 = dense_layer(output1) log_likelihood1, _1 = tf.contrib.crf.crf_log_likelihood( logits1, tags, nwords, crf_params) adv_loss = tf.reduce_mean(-log_likelihood1) loss += adv_loss ########## weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars']), (features['jasos'], features['njasos'])) # Read vocabs and inputs (words, nwords), (chars, nchars), (jasos, njasos) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) vocab_jasos = tf.contrib.lookup.index_table_from_file( params['jasos'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open(encoding="utf8") as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] with Path(params['jasos']).open(encoding="utf8") as f: num_jasos = sum(1 for _ in f) + params['num_oov_buckets'] # jaos embedding jaso_ids = vocab_jasos.lookup(jasos) variable = tf.get_variable('jasos_embeddings', [num_jasos, params['dim_chars']], tf.float32) jaso_embeddings = tf.nn.embedding_lookup(variable, jaso_ids) jaso_embeddings = tf.layers.dropout(jaso_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(jaso_embeddings)[1] dim_chars = tf.shape(jaso_embeddings)[2] flat = tf.reshape(jaso_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) jaso_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(char_embeddings)[1] dim_chars = tf.shape(char_embeddings)[2] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Word Embeddings word_ids = vocab_words.lookup(words) fasttext = np.load(params['fasttext'])['embeddings'] # np.array variable = np.vstack([fasttext, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32) #, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings, jaso_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving, features are a bit different #if isinstance(features, dict): # features = features['words'], features['nwords'] # Read vocabs and inputs #import ipdb #ipdb.set_trace() dropout = args.dropout #input_ids = features["input_ids"] #mask = features["mask"] #segment_ids = features["segment_ids"] #label_ids = features["label_ids"] ##words, nwords = features #tf.print(' '.join(words[4]), output_stream=sys.stderr) training = (mode == tf.estimator.ModeKeys.TRAIN) #vocab_words = tf.contrib.lookup.index_table_from_file( # #args.vocab_words) # args.vocab_words, num_oov_buckets=args.num_oov_buckets) #with Path(args.vocab_tags).open() as f: # indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] # num_tags = len(indices) + 1 ##word_ids = vocab_words.lookup(words) if args.embedding == 'word2id': # word2id with Path(args.vocab_words).open(encoding='utf-8') as f: vocab_words_1 = f.readlines() vocab_length = len(vocab_words_1) input_ids = features["input_ids"] label_ids = features["label_ids"] mask = features["mask"] embeddings = embedding(input_ids, vocab_length, args) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass elif args.embedding == 'bert': from my_model.embeddings.embedding import get_bert_embedding input_ids = features["input_ids"] mask = features["mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] embeddings = get_bert_embedding(args.bert_config_file, training, input_ids, mask, segment_ids, use_one_hot_embeddings=False) else: # Word Embeddings # deafult input_ids = features["input_ids"] label_ids = features["label_ids"] mask = features["mask"] glove = np.load(args.glove)['embeddings'] # np.array variable = np.vstack([glove, [[0.] * args.dim]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, input_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass (total_loss, logits, predicts) = create_model(embeddings, label_ids, mask, training, self.num_labels, use_one_hot_embeddings=False) tvars = tf.trainable_variables() initialized_variable_names = None scaffold_fn = None if args.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, args.init_checkpoint) tf.train.init_from_checkpoint(args.init_checkpoint, assignment_map) self.logging.debug("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" self.logging.debug(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: warmup_steps = args.warmup_steps step = tf.to_float(tf.train.get_global_step()) if args.learning_rate_decay == 'sqrt': lr_warmup = args.learning_rate_peak * tf.minimum( 1.0, step / warmup_steps) lr_decay = args.learning_rate_peak * tf.minimum( 1.0, tf.sqrt(warmup_steps / step)) lr = tf.where(step < warmup_steps, lr_warmup, lr_decay) elif args.learning_rate_decay == 'exp': lr = tf.train.exponential_decay( args.learning_rate_peak, global_step=step, decay_steps=args.decay_steps, decay_rate=args.decay_rate) elif args.learning_rate_decay == 'bert': num_train_steps = int(self.len_train_examples / args.batch_size * args.epochs) #num_warmup_steps = int(num_train_steps * args.warmup_steps) num_warmup_steps = int(num_train_steps * 0.1) train_op = optimization.create_optimizer( total_loss, args.learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) return output_spec else: self.logging.info( 'learning rate decay strategy not supported') sys.exit() tf.print(lr) train_op = tf.train.AdamOptimizer(lr).minimize( total_loss, global_step=tf.train.get_or_create_global_step()) #return tf.estimator.EstimatorSpec( # mode, loss=loss, train_op=train_op) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) elif mode == tf.estimator.ModeKeys.EVAL: #def metric_fn(label_ids, logits,num_labels,mask): # predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) # cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask) # return { # "confusion_matrix":cm # } # # #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask]) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( # Metrics #weights = tf.sequence_mask(nwords) weights = mask #mask2len = tf.reduce_sum(mask,axis=1) #weights = tf.sequence_mask(mask2len) #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32) pred_ids = predicts num_label_ids = self.num_labels metrics = { 'acc': tf.metrics.accuracy(label_ids, pred_ids, weights), #'precision': tf.metrics.precision(label_ids, pred_ids, weights), #'recall': tf.metrics.recall(label_ids, pred_ids, weights), ##'f1': f1(label_ids, pred_ids, weights), 'precision': precision(label_ids, pred_ids, self.num_labels, self.indices, weights), 'recall': recall(label_ids, pred_ids, self.num_labels, self.indices, weights), 'f1': f1(label_ids, pred_ids, self.num_labels, self.indices, weights), } output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=metrics #scaffold_fn=scaffold_fn ) else: #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predicts, #scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1] # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len( indices) + 1 # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings,学习字符嵌入向量 char_ids = vocab_chars.lookup(chars) # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后 # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动 variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], dtype=tf.float32) # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars']))) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码 mask = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, mask, params['filters'], params['kernel_size']) # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # Bi-LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关 # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征 logits = tf.layers.dense( output, num_tags) # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics mask = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, mask), 'precision': precision(tags, pred_ids, num_tags, indices, mask), 'recall': recall(tags, pred_ids, num_tags, indices, mask), 'f1': f1(tags, pred_ids, num_tags, indices, mask), } # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而 # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0] # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变 for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( loss, global_step=tf.train.get_or_create_global_step()) # 默认学习率1e-3 return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def __model_fn(self, features, labels, mode, params): '模型结构: Bi_LSTM + CRF' 'features: 特征列; labels: tag列; ' 'mode: tf.estimator.Estimator()自带的参数,用于判定TRAIN EVAL PREDICT三种类型' 'params: 参数词典' # 判断features是那种类型:类型1:((([None],()),([None,None],[None])),[None]),这是self.__input_fn()输出的类型 # 类型2: {'words':[word1,word2,..],'nwords':number,'chars':[['J','o',..],['l',..],..],'nchars':number}, # 这是我们在预测时输入的类型 if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) with tf.name_scope('Read_data'): # 获取特征列各项 (words, nwords), (chars, nchars) = features # words是单词列表,nwords是其相应的数量 # 获取汉语单字或英文字母的词包,eg: {char1:int64} vocab_chars = tf.contrib.lookup.index_table_from_file( params['char_vocabulary'], num_oov_buckets=params['num_oov_buckets']) # 获取汉语词语或英文单词的词包,eg:{char2:int64} vocab_words = tf.contrib.lookup.index_table_from_file( params['word_vocabulary'], num_oov_buckets=params['num_oov_buckets']) # 获取标记对应的索引,不包括用于填充batch的padding_tag with Path(params['tags']).open('r', encoding='utf-8') as fi: # indices用于存储正类tag的索引,即不包含padding_tag indices = [ idx for idx, tag in enumerate(fi) if tag.strip() != params.get('padding_tag', 'pad') ] num_tags = len(indices) + 1 # 总体的tag数量还要加上padding_tag,用于构建转移矩阵 # 获取汉语单字或英文字母的数量 with Path(params['char_vocabulary']).open('r', encoding='utf-8') as fi: # # char的数量还得加上,不在词包中的字符我们给它们的索引数量 num_chars = sum(1 for _ in fi) + params['num_oov_buckets'] # 判断模式:训练,评估,预测 training = (mode == tf.estimator.ModeKeys.TRAIN) with tf.name_scope('Char_Embeddings_Layer'): char_ids = vocab_chars.lookup(chars) # 获取字母列表的id列表 # char2vec = tf.get_variable('char_embeddings',[num_chars,params['char2vec_dim']],tf.float32) # char_embeddings = tf.nn.embedding_lookup(char2vec,char_ids) # 是否加载外部的汉字单字或英文字母的向量 if params['if_load_char2vec']: char2vec = np.load( params['char2vec'])['embeddings'] # 加载词向量,可通过char_id查找获取 # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 char2vec = np.vstack( [char2vec, [[0.] * params['char2vec_dim']]]) char2vec = tf.Variable(char2vec, dtype=tf.float32, trainable=False) # 词向量表转为tf.tensor,不可训练 # 获取字母列表中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size) # 这里batch是每条输入中的单词个数 char_embeddings = tf.nn.embedding_lookup(char2vec, char_ids) else: # 通过模型训练词向量 with Path(params['char_vocabulary']).open( 'r', encoding='utf-8') as fi: char_vocab = [ word for idx, word in enumerate(fi) if word.strip() != '' ] char2vec = tf.get_variable( 'char2vec', [len(char_vocab), params['char2vec_dim']]) # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 padding_vec = tf.Variable([[0.] * params['char2vec_dim']], dtype=tf.float32) char2vec = tf.concat([char2vec, padding_vec], axis=0) char2vec = tf.Variable(char2vec, dtype=tf.float32, trainable=True) # 词向量表转为tf.tensor,可训练 # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0 # 因此需要特别处理一下,每次都需要将char2vec最后一个向量变为全0,我们用mask # 再构建一张lookup_table,形状与char2vec一致,其中除了最后一行元素全为0外,其余都是1 mask = [params['char2vec_dim'] ] * len(char_vocab) + [0] * params['char2vec_dim'] mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32) mask_vec = tf.nn.embedding_lookup(mask_lookup_table, char_ids) # 获取单词中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size) # 这里batch是每条输入中的单词个数 embeddings = tf.nn.embedding_lookup(char2vec, char_ids) # 将char_ids中的padding_tag的向量重置为0 char_embeddings = tf.multiply(embeddings, mask_vec) with tf.name_scope('Char_Embedding_Dropout_Layer'): # char_embeddings.shape = (None,None,None,params['char2vec_dim'] # 第一个None是batch_size,第二个是每条输入中的单词个数 # 第三个None是每条输入中每个单词包含的字母个数的列表 char_embeddings = tf.layers.dropout(char_embeddings, rate=params['dropout'], training=training) with tf.name_scope('Char_LSTM_Layer'): dim_words = tf.shape(char_embeddings)[1] # 当前输入中的单词个数 dim_chars = tf.shape(char_embeddings)[2] # 当前输入中的每个单词的字母个数 flat = tf.reshape(char_embeddings, [-1, dim_chars, params['char2vec_dim']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell( params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_fw) # 获取正向LSTM最后一时刻的输出 _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) # 获取反向LSTM最后一时刻的输出 _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) # 将这两个时刻的输出按最后一维度拼接 output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape( output, [-1, dim_words, params['char_lstm_size'] * 2]) with tf.name_scope('Word_Embeddings_Layer'): word_ids = vocab_words.lookup(words) # 获取单词列表的id列表 # 是否加载外部的词向量 if params['if_load_word2vec']: word2vec = np.load( params['word2vec'])['embeddings'] # 加载词向量,可通过word_id查找获取 # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 word2vec = np.vstack( [word2vec, [[0.] * params['word2vec_dim']]]) word2vec = tf.Variable(word2vec, dtype=tf.float32, trainable=False) # 词向量表转为tf.tensor,不可训练 # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size) word_embeddings = tf.nn.embedding_lookup(word2vec, word_ids) else: # 通过模型训练词向量 with Path(params['word_vocabulary']).open( 'r', encoding='utf-8') as fi: vocab = [ word for idx, word in enumerate(fi) if word.strip() != '' ] word2vec = tf.get_variable( 'word2vec', [len(vocab), params['word2vec_dim']]) # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 padding_vec = tf.Variable([[0.] * params['word2vec_dim']], dtype=tf.float32) word2vec = tf.concat([word2vec, padding_vec], axis=0) word2vec = tf.Variable(word2vec, dtype=tf.float32, trainable=True) # 词向量表转为tf.tensor,可训练 # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0 # 因此需要特别处理一下,每次都需要将word2vec最后一个向量变为全0,我们用mask # 再构建一张lookup_table,形状与word2vec一致,其中除了最后一行元素全为0外,其余都是1 mask = [params['word2vec_dim'] ] * len(vocab) + [0] * params['word2vec_dim'] mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32) mask_vec = tf.nn.embedding_lookup(mask_lookup_table, word_ids) # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size) embeddings = tf.nn.embedding_lookup(word2vec, word_ids) # 将word_ids中的padding_tag的向量重置为0 word_embeddings = tf.multiply(embeddings, mask_vec) with tf.name_scope('Concatenate_CharEmbedding_WordEmbedding'): embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) with tf.name_scope('Dropout_Layer'): embeddings = tf.layers.dropout(embeddings, rate=params['dropout'], training=training) with tf.name_scope('Word_Bi_LSTM'): # 将输入形状转为shape=(time_len,batch_size,input_size),方便LSTM计算 inputs = tf.transpose(embeddings, perm=[1, 0, 2]) # 正向LSTM lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell( params['word_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell( params['word_lstm_size']) # 反向LSTM lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) # 正向每时刻隐藏层状态 output_fw, _ = lstm_cell_fw(inputs, dtype=tf.float32, sequence_length=nwords) # 反向每时刻隐藏层状态 output_bw, _ = lstm_cell_bw(inputs, dtype=tf.float32, sequence_length=nwords) # 将两个方向的状态,按时刻前后拼接在一起,沿最后一轴拼接 output = tf.concat([output_fw, output_bw], axis=-1) # 将output形状再变回来shape = (batch_size,time_len,input_size) output = tf.transpose(output, perm=[1, 0, 2]) with tf.name_scope('LSTM_dropout'): output = tf.layers.dropout(output, rate=params['dropout'], training=training) with tf.name_scope('Fully_connected_layer'): # 全连接层计算每一时刻的得分值 logits = tf.layers.dense(output, num_tags) with tf.name_scope('CRF'): # CRF转移矩阵 crf_params = tf.get_variable('crf', [num_tags, num_tags], dtype=tf.float32) # crf解码,pred_ids是预测的标记列表 pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # 判断是训练,评估,还是预测 if mode == tf.estimator.ModeKeys.PREDICT: # 预测 # 获取标记tag与其索引的字典,格式为{id:tag,..} reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) # 将tag的id映射到tag上,获取预测的标记tag pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) # 此字典存储,需要预测的内容 predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss # 获取标记与其索引映射表,{tag:id},注意包含了填充标记pad vocab_tags = tf.contrib.lookup.index_table_from_file( params['tags']) # 将真实tag转为id序列 tags = vocab_tags.lookup(labels) # 计算损失函数,负的对数似然 log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # 评估指标 weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) # 评估 if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # 训练 elif mode == tf.estimator.ModeKeys.TRAIN: # 优化器 train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, # training=training) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['char_filters'], params['char_kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['w2v'])['embeddings'] # np.array print("glove shape", glove.shape) variable = np.vstack([glove, [[0.] * params['dim']]]) # [vob_size, emb_size] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # sess = tf.InteractiveSession() # emb_shape = sess.run(tf.shape(embeddings)) # print("-"*50,'emb_shape:',emb_shape) # block_unflat_scores shape: [batch_size, max_seq_len, class_num] block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False) pred_ids = tf.argmax(block_unflat_scores[-1], 2) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32) # input_mask = tf.ones_like(words,dtype=tf.int32) # for i, real_seq_len in enumerate(nwords): # input_mask[i, real_seq_len:] = 0 # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int") # for i, real_seq_len in enumerate(nwords.eval()): # input_mask[i, real_seq_len:] = 0 vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) # CalculateMean cross-entropy loss with tf.name_scope("loss"): loss = tf.constant(0.0) # labels = tf.cast(labels, 'int32') # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape]) # print(block_unflat_scores[-1].shape) # tags = tf.Print(tags,[tags.shape]) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=block_unflat_scores[-1], labels=tags) # masked_losses = tf.multiply(losses, input_mask) # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask)) loss += tf.reduce_sum(losses) loss += params["l2_penalty"] * l2_loss # Metrics weights = tf.sequence_mask(nwords) # tags_min = tf.reduce_min(tags) # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min") # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags") metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(self, features, labels, mode, params): # For serving, features are a bit different if isinstance(features, dict): features = features['words'], features['nwords'] # Read vocabs and inputs dropout = params['dropout'] words, nwords = features #nwords = tf.shape(words)[0] #print('###########tf.shape nwords:{}#######'.format(nwords)) training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) if mode == tf.estimator.ModeKeys.PREDICT: # Word Embeddings word_ids = vocab_words.lookup(words) if self.embeding == 'glove': glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.]*params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=True) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # add by panyc # with Path(params['words']).open() as f: # vocab_words = f.readlines() # vocab_length = len(vocab_words) # end else: embeddings = tf.Variable( # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0)) tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574) ) embeddings = tf.nn.embedding_lookup(embeddings, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM # t = tf.transpose(embed, perm=[1, 0, 2]) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, params['num_tags']) crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: print('##########nwords:{}###########'.format(nwords)) #words_shards = tf.split(words, self.num_gpus) #labels_shards = tf.split(labels, self.num_gpus) words = tf.cond(tf.less(tf.shape(words)[0], self.num_gpus), \ lambda:tf.concat([words]*self.num_gpus,0),lambda:words) nwords = tf.cond(tf.less(tf.shape(nwords)[0], self.num_gpus), \ lambda:tf.concat([nwords]*self.num_gpus,0),lambda:nwords) labels = tf.cond(tf.less(tf.shape(labels)[0], self.num_gpus), \ lambda:tf.concat([labels]*self.num_gpus,0),lambda:labels) n = (tf.shape(words)[0]//self.num_gpus ) * self.num_gpus words = words[:n] nwords = nwords[:n] labels = labels[:n] words_shards = tf.split(words, self.num_gpus) nwords_shards = tf.split(nwords, self.num_gpus) labels_shards = tf.split(labels, self.num_gpus) loss_shards = [] grad_shards = [] metric_accuracy = [] accuracy_op = None metric_precision = [] precision_op = None metric_recall = [] recall_op = None metric_f1 = [] f1_op = None #nwords = tf.div(nwords, self.num_gpus) #nwords=10 #nwords = tf.constant([nwords,], dtype=tf.int32) for i, device in enumerate(self.devices): with tf.variable_scope( tf.get_variable_scope(), reuse=True if i > 0 else None): with tf.device(device): words = words_shards[i] nwords = nwords_shards[i] labels = labels_shards[i] word_ids = vocab_words.lookup(words) if self.embeding == 'glove': glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.]*params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=True) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # add by panyc # with Path(params['words']).open() as f: # vocab_words = f.readlines() # vocab_length = len(vocab_words) # end else: embeddings = tf.Variable( # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0)) tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574) ) embeddings = tf.nn.embedding_lookup(embeddings, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM # t = tf.transpose(embed, perm=[1, 0, 2]) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, params['num_tags']) crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) # vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'], num_oov_buckets=params['num_oov_buckets']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) loss_shards.append(loss) weights = tf.sequence_mask(nwords,tf.shape(tags)[1]) ## add by panyc #weights = tf.expand_dims(weights,axis=0) ## end val,accuracy_op = tf.metrics.accuracy(tags, pred_ids, weights) metric_accuracy.append([val]) val,precision_op = precision(tags, pred_ids, params['num_tags'], self.indices, weights) metric_precision.append([val]) val,recall_op = recall(tags, pred_ids, params['num_tags'], self.indices, weights) metric_recall.append([val]) val,f1_op = f1(tags, pred_ids, params['num_tags'], self.indices, weights) metric_f1.append([val]) loss = tf.reduce_mean(loss_shards) metric_accuracy = tf.reduce_mean(metric_accuracy) metric_precision = tf.reduce_mean(metric_precision) metric_recall = tf.reduce_mean(metric_recall) metric_f1 = tf.reduce_mean(metric_f1) metrics = { 'acc': (metric_accuracy,accuracy_op), 'precision': (metric_precision,precision_op), 'recall': (metric_recall, recall_op), 'f1': (metric_f1, f1_op), } # Metrics #weights = tf.sequence_mask(nwords) for metric_name, op in metrics.items(): print('############op##########') print(op) tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: # train_op = tf.train.AdamOptimizer().minimize( # loss, global_step=tf.train.get_or_create_global_step()) train_op = tf.train.AdamOptimizer(learning_rate=self.params['learnning_rate']).minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)