def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Example #2
0
 def metric_fn(per_example_loss, label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(
         label_ids,
         predictions,
         13, [1, 2, 3, 4, 5, 6, 7, 8],
         average="macro")  #13 是总label数,括号里是有用的
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                13, [1, 2, 3, 4, 5, 6, 7, 8],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       13, [1, 2, 3, 4, 5, 6, 7, 8],
                       average="macro")
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
     }
Example #3
0
 def metric_fn(per_example_loss, label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      num_labels, [1, 2],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                num_labels, [1, 2],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       num_labels, [1, 2],
                       average="macro")
     #
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
     }
Example #4
0
            def metric_fn(label_ids, pred_ids):
                # 首先对结果进行维特比解码
                # crf 解码

                indices = [2, 3, 4, 5, 6,
                           7]  # indice参数告诉评估矩阵评估哪些标签,与label_list相对应
                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels, indices, weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           indices, weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices,
                                  weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Example #5
0
 def metric_fn(seq_length, max_len, label_ids, pred_ids):
     indices = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                12]  # indice参数告诉评估矩阵评估哪些标签
     # Metrics
     weights = tf.sequence_mask(seq_length, maxlen=max_len)
     metrics = {
         'acc':
         tf.metrics.accuracy(label_ids, pred_ids, weights),
         'precision':
         precision(label_ids, pred_ids, params['num_labels'],
                   indices, weights),
         'recall':
         recall(label_ids, pred_ids, params['num_labels'], indices,
                weights),
         'f1':
         f1(label_ids, pred_ids, params['num_labels'], indices,
            weights),
     }
     for metric_name, op in metrics.items():
         tf.summary.scalar(metric_name, op[1])
     return eval_metrics
Example #6
0
 def metric_fn(per_example_loss, label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      11, [1, 2, 4, 5, 6, 7, 8, 9],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                11, [1, 2, 4, 5, 6, 7, 8, 9],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       11, [1, 2, 4, 5, 6, 7, 8, 9],
                       average="macro")
     loss = tf.metrics.mean(per_example_loss)
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         "eval_loss": loss,
     }
 def metric_fn(label_ids, pred_ids, per_example_loss,
               input_mask):
     # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"]
     indices = [2, 3]
     precision = tf_metrics.precision(label_ids, pred_ids,
                                      num_labels, indices,
                                      input_mask)
     recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                indices, input_mask)
     f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices,
                       input_mask)
     accuracy = tf.metrics.accuracy(label_ids, pred_ids,
                                    input_mask)
     loss = tf.metrics.mean(per_example_loss)
     return {
         'eval_precision': precision,
         'eval_recall': recall,
         'eval_f': f,
         'eval_accuracy': accuracy,
         'eval_loss': loss,
     }
Example #8
0
    def compute_metrics(self, tags, pred_ids, num_tags, indices, nwords):
        weights = tf.sequence_mask(nwords)

        # metrics_correct_rate, golden, predict = correct_rate(tags, pred_ids)
        # metrics_correct_rate = correct_rate(tags, pred_ids, weights)

        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices,
                                   weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
            'correct_rate': correct_rate(tags, pred_ids, weights),
            # 'golden': (golden, tf.zeros([], tf.int32)),
            # 'predict': (predict, tf.zeros([], tf.int32))
        }

        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        return metrics
 def metric_fn(label_ids, pred_ids, num_labels):
     # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     pos_indices = [id for id in range(2, num_labels - 3)]
     # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
     #                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
     # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
     precision = tf_metrics.precision(
         label_ids, pred_ids, num_labels, pos_indices, average="micro")
     recall = tf_metrics.recall(
         label_ids, pred_ids, num_labels, pos_indices, average="micro")
     f = tf_metrics.f1(label_ids, pred_ids,
                       num_labels, pos_indices, average="micro")
     # hook_dict['precision'] = precision
     # hook_dict['recall'] = recall
     # hook_dict['f'] = f
     # tf.summary.scalar('precision', precision)
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         # "eval_loss": loss,
     }
        def metric_fn(per_example_loss, label_ids, logits):
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            if task_name == "cola":
                FN, FN_op = tf.metrics.false_negatives(labels=label_ids,
                                                       predictions=predictions)
                FP, FP_op = tf.metrics.false_positives(labels=label_ids,
                                                       predictions=predictions)
                TP, TP_op = tf.metrics.true_positives(labels=label_ids,
                                                      predictions=predictions)
                TN, TN_op = tf.metrics.true_negatives(labels=label_ids,
                                                      predictions=predictions)

                MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) *
                                             (TN + FP) * (TN + FN))**0.5
                MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op,
                                  tf.identity(MCC, name="MCC"))
                return {"MCC": (MCC, MCC_op)}
            elif task_name == "mrpc":
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                f1 = tf_metrics.f1(labels=label_ids,
                                   predictions=predictions,
                                   num_classes=2,
                                   pos_indices=[1])
                return {
                    "eval_accuracy": accuracy,
                    "eval_f1": f1,
                    "eval_loss": loss,
                }
            else:
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }
Example #11
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # labels = []
                # for i, x in enumerate()
                predict_labels = []
                # for i in range(1, num_labels - 4):
                #     predict_labels.append(i)
                # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro")
                # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro")
                # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro")

                precision = tf_metrics.precision(label_ids, predictions, num_labels, average="macro")
                recall = tf_metrics.recall(label_ids, predictions, num_labels, average="macro")
                f = tf_metrics.f1(label_ids, predictions, num_labels, average="macro")

                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Example #12
0
 def metric_fn(per_example_loss, label_ids, logits,
               is_real_example):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                num_classes=num_labels,
                                average='micro')
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      num_classes=num_labels,
                                      average='micro')
     f1 = tf_metrics.f1(label_ids,
                        predictions,
                        num_classes=num_labels,
                        average='micro')
     loss = tf.metrics.mean(values=per_example_loss,
                            weights=is_real_example)
     return {
         "eval_recall": recall,
         'eval_precision': precision,
         'eval_f1': f1,
         "eval_loss": loss,
     }
Example #13
0
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                # print(label_ids.get_shape())  #[?,128]
                # print(pred_ids.get_shape())   #[64,128]
                # print(num_labels)  #17
                # print(weight.get_shape())  #[128,]
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels,
                                                 [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels,
                                  [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Example #14
0
def eval_phase(label_ids, pred_ids, num_labels):
    # 首先对结果进行维特比解码
    # crf 解码
    eval_list = []
    assert True == os.path.exists(
        os.path.join(FLAGS.output_dir, "eval_ids_list.txt"))
    list_file = open(os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r')
    contents = list_file.readlines()
    for item in contents:
        eval_list.append(int(
            item.strip()))  ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的
    assert 0 < len(eval_list)
    print("eval_list:", eval_list)
    weight = tf.sequence_mask(FLAGS.max_seq_length)
    precision = tf_metrics.precision(label_ids, pred_ids, num_labels,
                                     eval_list, weight)
    tf.summary.scalar("precision", precision[1])
    recall = tf_metrics.recall(label_ids, pred_ids, num_labels, eval_list,
                               weight)
    tf.summary.scalar("recall", recall[1])
    f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list, weight)
    tf.summary.scalar("f1", f[1])
    return (precision, recall, f)
 def metric_fn(per_example_loss, label_ids, logits):
     # def metric_fn(label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     n_class = [i for i in range(len(LABELS) - 2)]
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      len(LABELS) + 1, [1, 2, 3],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                len(LABELS) + 1, [1, 2, 3],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       len(LABELS) + 1, [1, 2, 3],
                       average="macro")
     #
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         # "eval_loss": loss,
     }
Example #16
0
 def metric_fn(per_example_loss, label_ids, logits):
     # def metric_fn(label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(
         label_ids,
         predictions,
         len(NerProcessor.get_labels()) + 1,
         average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                len(NerProcessor.get_labels()) + 1,
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       len(NerProcessor.get_labels()) + 1,
                       average="macro")
     #
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         #"eval_loss": loss,
     }
Example #17
0
            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # print("predictions shape: " + str(predictions.get_shape().as_list()))
                # print("label_ids shape: " + str(label_ids.get_shape().as_list()))
                # print("is_real_example shape: " + str(is_real_example.get_shape().as_list()))
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels, [2, 3, 4, 5],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels, [2, 3, 4, 5],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels, [2, 3, 4, 5],
                                  average="macro")

                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)

                # precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                # recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                # f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }
Example #18
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                print(logits)
                print(predictions)
                print(label_ids)

                #label_ids_array = label_ids.eval()
                #predictions_array = predictions.eval()
                #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                #print(predictions_array)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                precision = tf_metrics.precision(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                recall = tf_metrics.recall(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                f = tf_metrics.f1(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                #class_repo = classification_report(label_ids_array, predictions_array )
                return {
                    "eval_accuracy": accuracy,
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #    "eval_class_repo":class_repo,
                    #"eval_loss": loss,
                }
Example #19
0
            def metric_fn(label_ids, predicted_labels, input_mask, num_labels):

                label_ids = tf.boolean_mask(label_ids, input_mask)
                predicted_labels = tf.boolean_mask(predicted_labels,
                                                   input_mask)

                precision = tf_metrics.precision(label_ids,
                                                 predicted_labels,
                                                 num_labels, [1, 2, 3],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predicted_labels,
                                           num_labels, [1, 2, 3],
                                           average="macro")
                f1 = tf_metrics.f1(label_ids,
                                   predicted_labels,
                                   num_labels, [1, 2, 3],
                                   average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f1
                }
Example #20
0
 def metric_fn(seq_length, max_len, label_ids, pred_ids):
     indices = [1, 2]  # indice参数告诉评估矩阵评估哪些标签
     # Metrics
     weights = tf.sequence_mask(seq_length, maxlen=max_len)
     tf.logging.info("****shape in metrics***")
     label_ids_metric = tf.argmax(label_ids, 1)
     tf.logging.info(label_ids_metric.shape)
     tf.logging.info(pred_ids.shape)
     metrics = {
         'acc':
         tf.metrics.accuracy(label_ids_metric, pred_ids),
         'precision':
         precision(label_ids_metric, pred_ids, params['num_labels'],
                   indices),
         'recall':
         recall(label_ids_metric, pred_ids, params['num_labels'],
                indices),
         'f1':
         f1(label_ids_metric, pred_ids, params['num_labels'],
            indices),
     }
     for metric_name, op in metrics.items():
         tf.summary.scalar(metric_name, op[1])
     return eval_metrics
Example #21
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E,
                # 具体见 tf.metrics里的函数
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 10, [1, 2, 3, 4, 5, 6],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           10, [1, 2, 3, 4, 5, 6],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  10, [1, 2, 3, 4, 5, 6],
                                  average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Example #22
0
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(max_seq_length)
                # 0对应填充,1对应SEP,  是命名实体的当做正例(多分类时需要指定的),后面索引对应label_ids的
                # precision = TP / (TP + FP)   # 预测为正的样本中实际正样本的比例
                # recall = TP / (TP + FN)      # 实际正样本中预测为正的比例
                # accuracy = (TP + TN) / (P + N)
                # F1-score = 2 / [(1 / precision) + (1 / recall)]
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels,
                                                 [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           [2, 3, 4, 5, 6, 7], weight)
                f1 = tf_metrics.f1(label_ids, pred_ids, num_labels,
                                   [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f1": f1,
                    # "eval_loss": loss,
                }
Example #23
0
def model_fn(features, labels, mode, params):
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    input_ids = features["text"]
    author_id = features["author"]
    category_ids = features["categories"]
    label_id = features["label"]
    cnn = CnnModel(params, input_ids, author_id, category_ids, training)

    squeeze_label_ids = tf.squeeze(label_id, axis=1)
    logits, predict_label_ids, loss = cnn.build_network(squeeze_label_ids,)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        #words = tf.contrib.lookup.index_to_string_table_from_file(params['vocab'])
        #input_words = words.lookup(tf.to_int64(input_ids))
        predictions = {
            'true_label_ids': squeeze_label_ids,
            'predict_label_ids': predict_label_ids,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss

        train_op = optimization.create_optimizer(loss, params['learning_rate'], params['train_steps'], params['num_warmup_steps'])
        if mode == tf.estimator.ModeKeys.EVAL:
            # Metrics
            metrics = {
                'acc': tf.metrics.accuracy(squeeze_label_ids, predict_label_ids),
                # 分别计算各个类的P, R 然后按类求平均值
                'precision': precision(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
                'recall': recall(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
                'f1': f1(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
            }
            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
        elif mode == tf.estimator.ModeKeys.TRAIN:
            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
Example #24
0
def model_fn(features, labels, mode, params):
    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars), add_features = features
    add_features = add_features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable(
        'chars', [num_chars + 1, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
                                        training=training)

    # Char LSTM
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(
        char_embeddings, weights, params['filters'], params['kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)


   #Attention
    #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True)
    num_units = 200
    #W1 =  tf.get_variable("W1", [num_units, num_units], dtype=tf.float32)
    W1 =  tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32)
    b1 =  tf.get_variable("b1", [num_units, ], dtype=tf.float32)
    q = tf.tensordot(output, W1, axes=[[2], [0]])
    out_shape = tf.shape(output)
    #b1_shuffled = self.b1.dimshuffle('x', 'x', 0)
    b1_shuffled = tf.expand_dims(b1, 0)
    b1_shuffled = tf.expand_dims(b1_shuffled, 0)
    #print("b shape",tf.shape(b1_shuffled))
    q += b1_shuffled
    q = tf.tanh(q)
    q_trans = tf.transpose(q, perm=[0, 2, 1])
    #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1))
    out = tf.matmul(q, q_trans)
    #print("out dimension",out.shape)
    out *= (1 - tf.eye(out_shape[1], out_shape[1]))
    matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1])))
    matrix =  tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1]))
    #print("new dimension",matrix.shape)
    atten_out = tf.matmul(matrix,output)
    #print("atten dimension",atten_out.shape)
    #print("output dimension",output.shape)
    output = tf.concat([output, atten_out], axis=-1)
    output = tf.layers.dropout(output, rate=dropout, training=training)



    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params):
    if isinstance(features, dict):
        features = features['words'], features['nwords']

    dropout = params['dropout']
    words, nwords = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1

    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    embeddings = tf.nn.embedding_lookup(variable, word_ids)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    t = tf.transpose(embeddings, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    dense_layer = tf.layers.Dense(num_tags)
    logits = dense_layer(output)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        ##########
        epsilon = 5
        perturbed = _add_perturbation(embeddings, loss, epsilon)

        t = tf.transpose(perturbed, perm=[1, 0, 2])

        output_fw1, _1 = lstm_cell_fw(t,
                                      dtype=tf.float32,
                                      sequence_length=nwords)
        output_bw1, _1 = lstm_cell_bw(t,
                                      dtype=tf.float32,
                                      sequence_length=nwords)
        output1 = tf.concat([output_fw1, output_bw1], axis=-1)
        output1 = tf.transpose(output1, perm=[1, 0, 2])
        output1 = tf.layers.dropout(output1, rate=dropout, training=training)

        logits1 = dense_layer(output1)

        log_likelihood1, _1 = tf.contrib.crf.crf_log_likelihood(
            logits1, tags, nwords, crf_params)
        adv_loss = tf.reduce_mean(-log_likelihood1)

        loss += adv_loss
        ##########

        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Example #26
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']),
                    (features['jasos'], features['njasos']))

    # Read vocabs and inputs
    (words, nwords), (chars, nchars), (jasos, njasos) = features
    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    vocab_jasos = tf.contrib.lookup.index_table_from_file(
        params['jasos'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open(encoding="utf8") as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']
    with Path(params['jasos']).open(encoding="utf8") as f:
        num_jasos = sum(1 for _ in f) + params['num_oov_buckets']

    # jaos embedding
    jaso_ids = vocab_jasos.lookup(jasos)
    variable = tf.get_variable('jasos_embeddings',
                               [num_jasos, params['dim_chars']], tf.float32)
    jaso_embeddings = tf.nn.embedding_lookup(variable, jaso_ids)
    jaso_embeddings = tf.layers.dropout(jaso_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char LSTM
    dim_words = tf.shape(jaso_embeddings)[1]
    dim_chars = tf.shape(jaso_embeddings)[2]
    flat = tf.reshape(jaso_embeddings, [-1, dim_chars, params['dim_chars']])
    t = tf.transpose(flat, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    output = tf.concat([output_fw, output_bw], axis=-1)
    jaso_embeddings = tf.reshape(output, [-1, dim_words, 50])

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char LSTM
    dim_words = tf.shape(char_embeddings)[1]
    dim_chars = tf.shape(char_embeddings)[2]
    flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']])
    t = tf.transpose(flat, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    output = tf.concat([output_fw, output_bw], axis=-1)
    char_embeddings = tf.reshape(output, [-1, dim_words, 50])
    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    fasttext = np.load(params['fasttext'])['embeddings']  # np.array
    variable = np.vstack([fasttext, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32)  #, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings, jaso_embeddings],
                           axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Example #27
0
        def model_fn(features, labels, mode, params):
            # For serving, features are a bit different
            #if isinstance(features, dict):
            #    features = features['words'], features['nwords']

            # Read vocabs and inputs
            #import ipdb
            #ipdb.set_trace()
            dropout = args.dropout
            #input_ids = features["input_ids"]
            #mask = features["mask"]
            #segment_ids = features["segment_ids"]
            #label_ids = features["label_ids"]
            ##words, nwords = features
            #tf.print(' '.join(words[4]), output_stream=sys.stderr)
            training = (mode == tf.estimator.ModeKeys.TRAIN)
            #vocab_words = tf.contrib.lookup.index_table_from_file(
            #    #args.vocab_words)
            #    args.vocab_words, num_oov_buckets=args.num_oov_buckets)
            #with Path(args.vocab_tags).open() as f:
            #    indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
            #    num_tags = len(indices) + 1

            ##word_ids = vocab_words.lookup(words)
            if args.embedding == 'word2id':
                # word2id
                with Path(args.vocab_words).open(encoding='utf-8') as f:
                    vocab_words_1 = f.readlines()
                    vocab_length = len(vocab_words_1)
                input_ids = features["input_ids"]
                label_ids = features["label_ids"]
                mask = features["mask"]
                embeddings = embedding(input_ids, vocab_length, args)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            elif args.embedding == 'bert':
                from my_model.embeddings.embedding import get_bert_embedding
                input_ids = features["input_ids"]
                mask = features["mask"]
                segment_ids = features["segment_ids"]
                label_ids = features["label_ids"]
                embeddings = get_bert_embedding(args.bert_config_file,
                                                training,
                                                input_ids,
                                                mask,
                                                segment_ids,
                                                use_one_hot_embeddings=False)

            else:
                # Word Embeddings
                # deafult
                input_ids = features["input_ids"]
                label_ids = features["label_ids"]
                mask = features["mask"]
                glove = np.load(args.glove)['embeddings']  # np.array
                variable = np.vstack([glove, [[0.] * args.dim]])
                variable = tf.Variable(variable,
                                       dtype=tf.float32,
                                       trainable=False)
                embeddings = tf.nn.embedding_lookup(variable, input_ids)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            (total_loss, logits,
             predicts) = create_model(embeddings,
                                      label_ids,
                                      mask,
                                      training,
                                      self.num_labels,
                                      use_one_hot_embeddings=False)
            tvars = tf.trainable_variables()
            initialized_variable_names = None
            scaffold_fn = None
            if args.init_checkpoint:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, args.init_checkpoint)
                tf.train.init_from_checkpoint(args.init_checkpoint,
                                              assignment_map)
                self.logging.debug("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    self.logging.debug("  name = %s, shape = %s%s", var.name,
                                       var.shape, init_string)
            if mode == tf.estimator.ModeKeys.TRAIN:
                warmup_steps = args.warmup_steps
                step = tf.to_float(tf.train.get_global_step())
                if args.learning_rate_decay == 'sqrt':
                    lr_warmup = args.learning_rate_peak * tf.minimum(
                        1.0, step / warmup_steps)
                    lr_decay = args.learning_rate_peak * tf.minimum(
                        1.0, tf.sqrt(warmup_steps / step))
                    lr = tf.where(step < warmup_steps, lr_warmup, lr_decay)
                elif args.learning_rate_decay == 'exp':
                    lr = tf.train.exponential_decay(
                        args.learning_rate_peak,
                        global_step=step,
                        decay_steps=args.decay_steps,
                        decay_rate=args.decay_rate)
                elif args.learning_rate_decay == 'bert':
                    num_train_steps = int(self.len_train_examples /
                                          args.batch_size * args.epochs)
                    #num_warmup_steps = int(num_train_steps * args.warmup_steps)
                    num_warmup_steps = int(num_train_steps * 0.1)
                    train_op = optimization.create_optimizer(
                        total_loss,
                        args.learning_rate,
                        num_train_steps,
                        num_warmup_steps,
                        use_tpu=False)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        loss=total_loss,
                        train_op=train_op,
                        #scaffold_fn=scaffold_fn
                    )
                    return output_spec
                else:
                    self.logging.info(
                        'learning rate decay strategy not supported')
                    sys.exit()
                tf.print(lr)
                train_op = tf.train.AdamOptimizer(lr).minimize(
                    total_loss,
                    global_step=tf.train.get_or_create_global_step())
                #return tf.estimator.EstimatorSpec(
                #    mode, loss=loss, train_op=train_op)

                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    #scaffold_fn=scaffold_fn
                )

            elif mode == tf.estimator.ModeKeys.EVAL:
                #def metric_fn(label_ids, logits,num_labels,mask):
                #    predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                #    cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask)
                #    return {
                #        "confusion_matrix":cm
                #    }
                #    #
                #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask])
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                # Metrics
                #weights = tf.sequence_mask(nwords)
                weights = mask
                #mask2len = tf.reduce_sum(mask,axis=1)
                #weights = tf.sequence_mask(mask2len)
                #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                pred_ids = predicts
                num_label_ids = self.num_labels
                metrics = {
                    'acc':
                    tf.metrics.accuracy(label_ids, pred_ids, weights),
                    #'precision': tf.metrics.precision(label_ids, pred_ids, weights),
                    #'recall': tf.metrics.recall(label_ids, pred_ids, weights),
                    ##'f1': f1(label_ids, pred_ids, weights),
                    'precision':
                    precision(label_ids, pred_ids, self.num_labels,
                              self.indices, weights),
                    'recall':
                    recall(label_ids, pred_ids, self.num_labels, self.indices,
                           weights),
                    'f1':
                    f1(label_ids, pred_ids, self.num_labels, self.indices,
                       weights),
                }
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metrics
                    #scaffold_fn=scaffold_fn
                )
            else:
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predicts,
                    #scaffold_fn=scaffold_fn
                )
            return output_spec
Example #28
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1]
    # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(
            indices) + 1  # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings,学习字符嵌入向量
    char_ids = vocab_chars.lookup(chars)
    # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后
    # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']],
                               dtype=tf.float32)
    # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars'])))
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码
    mask = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, mask,
                                            params['filters'],
                                            params['kernel_size'])

    # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # Bi-LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关
    # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征
    logits = tf.layers.dense(
        output,
        num_tags)  # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        mask = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, mask),
            'precision': precision(tags, pred_ids, num_tags, indices, mask),
            'recall': recall(tags, pred_ids, num_tags, indices, mask),
            'f1': f1(tags, pred_ids, num_tags, indices, mask),
        }
        # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而
        # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0]
        # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
                loss,
                global_step=tf.train.get_or_create_global_step())  # 默认学习率1e-3
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Example #29
0
    def __model_fn(self, features, labels, mode, params):
        '模型结构: Bi_LSTM + CRF'
        'features: 特征列; labels: tag列; '
        'mode: tf.estimator.Estimator()自带的参数,用于判定TRAIN EVAL PREDICT三种类型'
        'params: 参数词典'
        # 判断features是那种类型:类型1:((([None],()),([None,None],[None])),[None]),这是self.__input_fn()输出的类型
        # 类型2: {'words':[word1,word2,..],'nwords':number,'chars':[['J','o',..],['l',..],..],'nchars':number},
        # 这是我们在预测时输入的类型
        if isinstance(features, dict):
            features = ((features['words'], features['nwords']),
                        (features['chars'], features['nchars']))

        with tf.name_scope('Read_data'):
            # 获取特征列各项
            (words, nwords), (chars,
                              nchars) = features  # words是单词列表,nwords是其相应的数量
            # 获取汉语单字或英文字母的词包,eg: {char1:int64}
            vocab_chars = tf.contrib.lookup.index_table_from_file(
                params['char_vocabulary'],
                num_oov_buckets=params['num_oov_buckets'])
            # 获取汉语词语或英文单词的词包,eg:{char2:int64}
            vocab_words = tf.contrib.lookup.index_table_from_file(
                params['word_vocabulary'],
                num_oov_buckets=params['num_oov_buckets'])
            # 获取标记对应的索引,不包括用于填充batch的padding_tag
            with Path(params['tags']).open('r', encoding='utf-8') as fi:
                # indices用于存储正类tag的索引,即不包含padding_tag
                indices = [
                    idx for idx, tag in enumerate(fi)
                    if tag.strip() != params.get('padding_tag', 'pad')
                ]
                num_tags = len(indices) + 1  # 总体的tag数量还要加上padding_tag,用于构建转移矩阵
            # 获取汉语单字或英文字母的数量
            with Path(params['char_vocabulary']).open('r',
                                                      encoding='utf-8') as fi:
                # # char的数量还得加上,不在词包中的字符我们给它们的索引数量
                num_chars = sum(1 for _ in fi) + params['num_oov_buckets']
            # 判断模式:训练,评估,预测
            training = (mode == tf.estimator.ModeKeys.TRAIN)

        with tf.name_scope('Char_Embeddings_Layer'):
            char_ids = vocab_chars.lookup(chars)  # 获取字母列表的id列表
            # char2vec = tf.get_variable('char_embeddings',[num_chars,params['char2vec_dim']],tf.float32)
            # char_embeddings = tf.nn.embedding_lookup(char2vec,char_ids)
            # 是否加载外部的汉字单字或英文字母的向量
            if params['if_load_char2vec']:
                char2vec = np.load(
                    params['char2vec'])['embeddings']  # 加载词向量,可通过char_id查找获取
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                char2vec = np.vstack(
                    [char2vec, [[0.] * params['char2vec_dim']]])
                char2vec = tf.Variable(char2vec,
                                       dtype=tf.float32,
                                       trainable=False)  # 词向量表转为tf.tensor,不可训练
                # 获取字母列表中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                # 这里batch是每条输入中的单词个数
                char_embeddings = tf.nn.embedding_lookup(char2vec, char_ids)
            else:
                # 通过模型训练词向量
                with Path(params['char_vocabulary']).open(
                        'r', encoding='utf-8') as fi:
                    char_vocab = [
                        word for idx, word in enumerate(fi)
                        if word.strip() != ''
                    ]
                char2vec = tf.get_variable(
                    'char2vec', [len(char_vocab), params['char2vec_dim']])
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                padding_vec = tf.Variable([[0.] * params['char2vec_dim']],
                                          dtype=tf.float32)
                char2vec = tf.concat([char2vec, padding_vec], axis=0)
                char2vec = tf.Variable(char2vec,
                                       dtype=tf.float32,
                                       trainable=True)  # 词向量表转为tf.tensor,可训练
                # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0
                # 因此需要特别处理一下,每次都需要将char2vec最后一个向量变为全0,我们用mask
                # 再构建一张lookup_table,形状与char2vec一致,其中除了最后一行元素全为0外,其余都是1
                mask = [params['char2vec_dim']
                        ] * len(char_vocab) + [0] * params['char2vec_dim']
                mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32)
                mask_vec = tf.nn.embedding_lookup(mask_lookup_table, char_ids)
                # 获取单词中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                # 这里batch是每条输入中的单词个数
                embeddings = tf.nn.embedding_lookup(char2vec, char_ids)
                # 将char_ids中的padding_tag的向量重置为0
                char_embeddings = tf.multiply(embeddings, mask_vec)

        with tf.name_scope('Char_Embedding_Dropout_Layer'):
            # char_embeddings.shape = (None,None,None,params['char2vec_dim']
            # 第一个None是batch_size,第二个是每条输入中的单词个数
            # 第三个None是每条输入中每个单词包含的字母个数的列表
            char_embeddings = tf.layers.dropout(char_embeddings,
                                                rate=params['dropout'],
                                                training=training)

        with tf.name_scope('Char_LSTM_Layer'):
            dim_words = tf.shape(char_embeddings)[1]  # 当前输入中的单词个数
            dim_chars = tf.shape(char_embeddings)[2]  # 当前输入中的每个单词的字母个数
            flat = tf.reshape(char_embeddings,
                              [-1, dim_chars, params['char2vec_dim']])
            t = tf.transpose(flat, perm=[1, 0, 2])
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['char_lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_fw)
            # 获取正向LSTM最后一时刻的输出
            _, (_, output_fw) = lstm_cell_fw(t,
                                             dtype=tf.float32,
                                             sequence_length=tf.reshape(
                                                 nchars, [-1]))
            # 获取反向LSTM最后一时刻的输出
            _, (_, output_bw) = lstm_cell_bw(t,
                                             dtype=tf.float32,
                                             sequence_length=tf.reshape(
                                                 nchars, [-1]))
            # 将这两个时刻的输出按最后一维度拼接
            output = tf.concat([output_fw, output_bw], axis=-1)
            char_embeddings = tf.reshape(
                output, [-1, dim_words, params['char_lstm_size'] * 2])

        with tf.name_scope('Word_Embeddings_Layer'):
            word_ids = vocab_words.lookup(words)  # 获取单词列表的id列表
            # 是否加载外部的词向量
            if params['if_load_word2vec']:
                word2vec = np.load(
                    params['word2vec'])['embeddings']  # 加载词向量,可通过word_id查找获取
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                word2vec = np.vstack(
                    [word2vec, [[0.] * params['word2vec_dim']]])
                word2vec = tf.Variable(word2vec,
                                       dtype=tf.float32,
                                       trainable=False)  # 词向量表转为tf.tensor,不可训练
                # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                word_embeddings = tf.nn.embedding_lookup(word2vec, word_ids)
            else:
                # 通过模型训练词向量
                with Path(params['word_vocabulary']).open(
                        'r', encoding='utf-8') as fi:
                    vocab = [
                        word for idx, word in enumerate(fi)
                        if word.strip() != ''
                    ]
                word2vec = tf.get_variable(
                    'word2vec', [len(vocab), params['word2vec_dim']])
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                padding_vec = tf.Variable([[0.] * params['word2vec_dim']],
                                          dtype=tf.float32)
                word2vec = tf.concat([word2vec, padding_vec], axis=0)
                word2vec = tf.Variable(word2vec,
                                       dtype=tf.float32,
                                       trainable=True)  # 词向量表转为tf.tensor,可训练
                # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0
                # 因此需要特别处理一下,每次都需要将word2vec最后一个向量变为全0,我们用mask
                # 再构建一张lookup_table,形状与word2vec一致,其中除了最后一行元素全为0外,其余都是1
                mask = [params['word2vec_dim']
                        ] * len(vocab) + [0] * params['word2vec_dim']
                mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32)
                mask_vec = tf.nn.embedding_lookup(mask_lookup_table, word_ids)
                # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                embeddings = tf.nn.embedding_lookup(word2vec, word_ids)
                # 将word_ids中的padding_tag的向量重置为0
                word_embeddings = tf.multiply(embeddings, mask_vec)

        with tf.name_scope('Concatenate_CharEmbedding_WordEmbedding'):
            embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)

        with tf.name_scope('Dropout_Layer'):
            embeddings = tf.layers.dropout(embeddings,
                                           rate=params['dropout'],
                                           training=training)

        with tf.name_scope('Word_Bi_LSTM'):
            # 将输入形状转为shape=(time_len,batch_size,input_size),方便LSTM计算
            inputs = tf.transpose(embeddings, perm=[1, 0, 2])
            # 正向LSTM
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['word_lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['word_lstm_size'])
            # 反向LSTM
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
            # 正向每时刻隐藏层状态
            output_fw, _ = lstm_cell_fw(inputs,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            # 反向每时刻隐藏层状态
            output_bw, _ = lstm_cell_bw(inputs,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            # 将两个方向的状态,按时刻前后拼接在一起,沿最后一轴拼接
            output = tf.concat([output_fw, output_bw], axis=-1)
            # 将output形状再变回来shape = (batch_size,time_len,input_size)
            output = tf.transpose(output, perm=[1, 0, 2])

        with tf.name_scope('LSTM_dropout'):
            output = tf.layers.dropout(output,
                                       rate=params['dropout'],
                                       training=training)

        with tf.name_scope('Fully_connected_layer'):
            # 全连接层计算每一时刻的得分值
            logits = tf.layers.dense(output, num_tags)

        with tf.name_scope('CRF'):
            # CRF转移矩阵
            crf_params = tf.get_variable('crf', [num_tags, num_tags],
                                         dtype=tf.float32)
            # crf解码,pred_ids是预测的标记列表
            pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

        # 判断是训练,评估,还是预测
        if mode == tf.estimator.ModeKeys.PREDICT:
            # 预测
            # 获取标记tag与其索引的字典,格式为{id:tag,..}
            reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
                params['tags'])
            # 将tag的id映射到tag上,获取预测的标记tag
            pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
            # 此字典存储,需要预测的内容
            predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        else:
            # Loss
            # 获取标记与其索引映射表,{tag:id},注意包含了填充标记pad
            vocab_tags = tf.contrib.lookup.index_table_from_file(
                params['tags'])
            # 将真实tag转为id序列
            tags = vocab_tags.lookup(labels)
            # 计算损失函数,负的对数似然
            log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                logits, tags, nwords, crf_params)
            loss = tf.reduce_mean(-log_likelihood)

            # 评估指标
            weights = tf.sequence_mask(nwords)
            metrics = {
                'acc': tf.metrics.accuracy(tags, pred_ids, weights),
                'precision': precision(tags, pred_ids, num_tags, indices,
                                       weights),
                'recall': recall(tags, pred_ids, num_tags, indices, weights),
                'f1': f1(tags, pred_ids, num_tags, indices, weights),
            }

            for metric_name, op in metrics.items():
                tf.summary.scalar(metric_name, op[1])

            # 评估
            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  eval_metric_ops=metrics)
            # 训练
            elif mode == tf.estimator.ModeKeys.TRAIN:
                # 优化器
                train_op = tf.train.AdamOptimizer().minimize(
                    loss, global_step=tf.train.get_or_create_global_step())
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  train_op=train_op)
Example #30
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features

    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars + 1, params['dim_chars']],
                               tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
    #                                     training=training)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                            params['char_filters'],
                                            params['char_kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['w2v'])['embeddings']  # np.array
    print("glove shape", glove.shape)
    variable = np.vstack([glove,
                          [[0.] * params['dim']]])  # [vob_size, emb_size]
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # sess = tf.InteractiveSession()
    # emb_shape = sess.run(tf.shape(embeddings))
    # print("-"*50,'emb_shape:',emb_shape)
    # block_unflat_scores shape: [batch_size, max_seq_len, class_num]
    block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False)
    pred_ids = tf.argmax(block_unflat_scores[-1], 2)
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32)
        # input_mask = tf.ones_like(words,dtype=tf.int32)
        # for i, real_seq_len in enumerate(nwords):
        #    input_mask[i, real_seq_len:] = 0
        # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int")
        # for i, real_seq_len in enumerate(nwords.eval()):
        #    input_mask[i, real_seq_len:] = 0

        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            loss = tf.constant(0.0)
            # labels = tf.cast(labels, 'int32')
            # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape])
            # print(block_unflat_scores[-1].shape)
            # tags = tf.Print(tags,[tags.shape])
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=block_unflat_scores[-1], labels=tags)
            # masked_losses = tf.multiply(losses, input_mask)
            # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask))
            loss += tf.reduce_sum(losses)
            loss += params["l2_penalty"] * l2_loss

        # Metrics
        weights = tf.sequence_mask(nwords)
        # tags_min = tf.reduce_min(tags)
        # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min")
        # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags")
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Example #31
0
    def model_fn(self, features, labels, mode, params):
        # For serving, features are a bit different

        if isinstance(features, dict):
            features = features['words'], features['nwords']

        # Read vocabs and inputs
        dropout = params['dropout']
        words, nwords = features
        
        #nwords = tf.shape(words)[0]
        #print('###########tf.shape nwords:{}#######'.format(nwords))
        training = (mode == tf.estimator.ModeKeys.TRAIN)
        vocab_words = tf.contrib.lookup.index_table_from_file(
            params['words'], num_oov_buckets=params['num_oov_buckets'])
        if mode == tf.estimator.ModeKeys.PREDICT:
            # Word Embeddings
            word_ids = vocab_words.lookup(words)
            if self.embeding == 'glove':
                glove = np.load(params['glove'])['embeddings']  # np.array
                variable = np.vstack([glove, [[0.]*params['dim']]])
                variable = tf.Variable(variable, dtype=tf.float32, trainable=True)
                embeddings = tf.nn.embedding_lookup(variable, word_ids)
                embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

            # add by panyc
            # with Path(params['words']).open() as f:
            #     vocab_words = f.readlines()
            #     vocab_length = len(vocab_words)
            # end
            else:

                embeddings = tf.Variable(
                    # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0))
                    tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574)
                )
                embeddings = tf.nn.embedding_lookup(embeddings, word_ids)
                embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

            # LSTM
            # t = tf.transpose(embed, perm=[1, 0, 2])
            t = tf.transpose(embeddings, perm=[1, 0, 2])
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
            output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
            output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.transpose(output, perm=[1, 0, 2])
            output = tf.layers.dropout(output, rate=dropout, training=training)

            # CRF
            logits = tf.layers.dense(output, params['num_tags'])
            crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32)
            pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

            # Predictions
            reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
                params['tags'])
            pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
            predictions = {
                'pred_ids': pred_ids,
                'tags': pred_strings
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        else:
            print('##########nwords:{}###########'.format(nwords))
            #words_shards = tf.split(words, self.num_gpus)
            #labels_shards = tf.split(labels, self.num_gpus)
            words = tf.cond(tf.less(tf.shape(words)[0], self.num_gpus), \
                lambda:tf.concat([words]*self.num_gpus,0),lambda:words)
            nwords = tf.cond(tf.less(tf.shape(nwords)[0], self.num_gpus), \
                lambda:tf.concat([nwords]*self.num_gpus,0),lambda:nwords)
            labels = tf.cond(tf.less(tf.shape(labels)[0], self.num_gpus), \
                lambda:tf.concat([labels]*self.num_gpus,0),lambda:labels)
            n = (tf.shape(words)[0]//self.num_gpus ) * self.num_gpus
            words = words[:n]
            nwords = nwords[:n]
            labels = labels[:n]
            words_shards = tf.split(words, self.num_gpus)
            nwords_shards = tf.split(nwords, self.num_gpus)
            labels_shards = tf.split(labels, self.num_gpus)
            loss_shards = []
            grad_shards = []
            metric_accuracy = []
            accuracy_op = None
            metric_precision = []
            precision_op = None
            metric_recall =  []
            recall_op = None
            metric_f1 = []
            f1_op = None
            #nwords = tf.div(nwords, self.num_gpus)
            #nwords=10
            #nwords = tf.constant([nwords,], dtype=tf.int32)
            for i, device in enumerate(self.devices):
                with tf.variable_scope( tf.get_variable_scope(), reuse=True if i > 0 else None):
                    with tf.device(device):
                        words = words_shards[i]
                        nwords = nwords_shards[i]
                        labels = labels_shards[i]
                        word_ids = vocab_words.lookup(words)
                        if self.embeding == 'glove':
                            glove = np.load(params['glove'])['embeddings']  # np.array
                            variable = np.vstack([glove, [[0.]*params['dim']]])
                            variable = tf.Variable(variable, dtype=tf.float32, trainable=True)
                            embeddings = tf.nn.embedding_lookup(variable, word_ids)
                            embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

                        # add by panyc
                        # with Path(params['words']).open() as f:
                        #     vocab_words = f.readlines()
                        #     vocab_length = len(vocab_words)
                        # end
                        else:

                            embeddings = tf.Variable(
                                # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0))
                                tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574)
                            )
                            embeddings = tf.nn.embedding_lookup(embeddings, word_ids)
                            embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

                        # LSTM
                        # t = tf.transpose(embed, perm=[1, 0, 2])
                        t = tf.transpose(embeddings, perm=[1, 0, 2])
                        lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
                        lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
                        lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
                        output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
                        output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
                        output = tf.concat([output_fw, output_bw], axis=-1)
                        output = tf.transpose(output, perm=[1, 0, 2])
                        output = tf.layers.dropout(output, rate=dropout, training=training)

                        # CRF
                        logits = tf.layers.dense(output, params['num_tags'])
                        crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32)
                        pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

                        # Loss

                        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
                        # vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'], num_oov_buckets=params['num_oov_buckets'])
                        tags = vocab_tags.lookup(labels)
                        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                            logits, tags, nwords, crf_params)
                        loss = tf.reduce_mean(-log_likelihood)
                        loss_shards.append(loss)
                        weights = tf.sequence_mask(nwords,tf.shape(tags)[1])
                        ## add by panyc
                        #weights = tf.expand_dims(weights,axis=0)
                        ## end
                        val,accuracy_op = tf.metrics.accuracy(tags, pred_ids, weights)
                        metric_accuracy.append([val])
                        val,precision_op = precision(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_precision.append([val])
                        val,recall_op = recall(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_recall.append([val])
                        val,f1_op = f1(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_f1.append([val])

            loss = tf.reduce_mean(loss_shards)
            metric_accuracy = tf.reduce_mean(metric_accuracy)
            metric_precision = tf.reduce_mean(metric_precision)
            metric_recall = tf.reduce_mean(metric_recall)
            metric_f1 = tf.reduce_mean(metric_f1)
            metrics = {
                'acc': (metric_accuracy,accuracy_op),
                'precision': (metric_precision,precision_op),
                'recall': (metric_recall, recall_op),
                'f1': (metric_f1, f1_op),
            }
            # Metrics
            #weights = tf.sequence_mask(nwords)
            for metric_name, op in metrics.items():
                print('############op##########')
                print(op)
                tf.summary.scalar(metric_name, op[1])

            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(
                    mode, loss=loss, eval_metric_ops=metrics)

            elif mode == tf.estimator.ModeKeys.TRAIN:
                # train_op = tf.train.AdamOptimizer().minimize(
                #     loss, global_step=tf.train.get_or_create_global_step())
                train_op = tf.train.AdamOptimizer(learning_rate=self.params['learnning_rate']).minimize(
                    loss, global_step=tf.train.get_or_create_global_step())
                return tf.estimator.EstimatorSpec(
                    mode, loss=loss, train_op=train_op)